Unverified Commit b180d3b1 authored by Vít Starý Novotný's avatar Vít Starý Novotný
Browse files

Fix syntax error in script/download_datasets.py

parent 83df2faf
Loading
Loading
Loading
Loading
+21 −7
Original line number Diff line number Diff line
from pathlib import Path

from pv211_utils.trec.loader import load_documents as trec_load_documents
from pv211_utils.arqmath.loader import load_answers as arqmath_load_answers, TEXT_FORMATS

def download_trec(root_directory: Path) -> None:
    from pv211_utils.trec.loader import load_documents

if __name__ == '__main__':
    pathname = str(root_directory/'trec_documents.json.gz')
    load_documents(cache_download=pathname)


def download_arqmath(root_directory: Path) -> None:
    from pv211_utils.arqmath.loader import load_answers, TEXT_FORMATS

    for text_format in TEXT_FORMATS:
        filename = str(root_directory/f'arqmath2020_answers_{text_format}.json.gz')
        load_answers(text_format, cache_download=filename)


def main() -> None:
    root_directory = Path('/var') / 'tmp' / 'pv211'
    root_directory.mkdir(parents=True, exist_ok=True)
    trec_load_documents(cache_download=str(root_directory/'trec_documents.json.gz'))
    for text_format in TEXT_FORMATS:
        arqmath_filename = str(root_directory/f'arqmath2020_answers_{text_format}.json.gz'
        arqmath_load_answers(text_format, cache_download=))
    download_trec(root_directory)
    download_arqmath(root_directory)


if __name__ == '__main__':
    main()