Loading script/download_datasets.py +21 −7 Original line number Diff line number Diff line from pathlib import Path from pv211_utils.trec.loader import load_documents as trec_load_documents from pv211_utils.arqmath.loader import load_answers as arqmath_load_answers, TEXT_FORMATS def download_trec(root_directory: Path) -> None: from pv211_utils.trec.loader import load_documents if __name__ == '__main__': pathname = str(root_directory/'trec_documents.json.gz') load_documents(cache_download=pathname) def download_arqmath(root_directory: Path) -> None: from pv211_utils.arqmath.loader import load_answers, TEXT_FORMATS for text_format in TEXT_FORMATS: filename = str(root_directory/f'arqmath2020_answers_{text_format}.json.gz') load_answers(text_format, cache_download=filename) def main() -> None: root_directory = Path('/var') / 'tmp' / 'pv211' root_directory.mkdir(parents=True, exist_ok=True) trec_load_documents(cache_download=str(root_directory/'trec_documents.json.gz')) for text_format in TEXT_FORMATS: arqmath_filename = str(root_directory/f'arqmath2020_answers_{text_format}.json.gz' arqmath_load_answers(text_format, cache_download=)) download_trec(root_directory) download_arqmath(root_directory) if __name__ == '__main__': main() Loading
script/download_datasets.py +21 −7 Original line number Diff line number Diff line from pathlib import Path from pv211_utils.trec.loader import load_documents as trec_load_documents from pv211_utils.arqmath.loader import load_answers as arqmath_load_answers, TEXT_FORMATS def download_trec(root_directory: Path) -> None: from pv211_utils.trec.loader import load_documents if __name__ == '__main__': pathname = str(root_directory/'trec_documents.json.gz') load_documents(cache_download=pathname) def download_arqmath(root_directory: Path) -> None: from pv211_utils.arqmath.loader import load_answers, TEXT_FORMATS for text_format in TEXT_FORMATS: filename = str(root_directory/f'arqmath2020_answers_{text_format}.json.gz') load_answers(text_format, cache_download=filename) def main() -> None: root_directory = Path('/var') / 'tmp' / 'pv211' root_directory.mkdir(parents=True, exist_ok=True) trec_load_documents(cache_download=str(root_directory/'trec_documents.json.gz')) for text_format in TEXT_FORMATS: arqmath_filename = str(root_directory/f'arqmath2020_answers_{text_format}.json.gz' arqmath_load_answers(text_format, cache_download=)) download_trec(root_directory) download_arqmath(root_directory) if __name__ == '__main__': main()