Unverified Commit de816176 authored by Vít Starý Novotný's avatar Vít Starý Novotný
Browse files

Add ARQMath 2021 queries

parent 8e59c155
Loading
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ from ..entities import DocumentBase, QueryBase


class ArqmathAnswerBase(DocumentBase):
    """An answer from the ARQMath 2020 collection.
    """An answer from the ARQMath collection.

    Parameters
    ----------
@@ -32,7 +32,7 @@ class ArqmathAnswerBase(DocumentBase):


class ArqmathQuestionBase(DocumentBase):
    """A question from the ARQMath 2020 collection.
    """A question from the ARQMath collection.

    Parameters
    ----------
@@ -76,7 +76,7 @@ class ArqmathQuestionBase(DocumentBase):


class ArqmathQueryBase(QueryBase):
    """A query from the answer retrieval task of ARQMath 2020.
    """A query from the answer retrieval task of ARQMath.

    Parameters
    ----------
+18 −15
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ TEXT_FORMATS = (
)

QUERY_SUBSETS = {
    2020: {
        'train': set([
            1, 3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 17, 19, 20, 21, 23, 26, 28, 30,
            33, 35, 36, 37, 38, 39, 41, 43, 45, 47, 50, 52, 55, 56, 58, 59, 60, 61,
@@ -34,6 +35,7 @@ QUERY_SUBSETS = {
        'test': set([
            8, 16, 27, 29, 42, 44, 49, 51, 53, 80, 99
        ]),
    },
}


@@ -61,15 +63,16 @@ def _resolve_query_id(raw_query_id: str) -> int:


def load_queries(text_format: str, query_class=ArqmathQueryBase,
                 subset: Optional[str] = 'validation') -> OrderedDict:
                 subset: Optional[str] = 'validation',
                 year: int = 2020) -> OrderedDict:
    _check_text_format(text_format)
    queries = OrderedDict()

    filename = 'data/arqmath2020_queries_{}.json'.format(text_format)
    filename = 'data/arqmath{}_queries_{}.json'.format(year, text_format)
    with open(pkg_resources.resource_filename('pv211_utils', filename), 'rt') as f:
        for raw_query in json.load(f):
            query_id = _resolve_query_id(raw_query['query_id'])
            if subset is not None and query_id not in QUERY_SUBSETS[subset]:
            if subset is not None and query_id not in QUERY_SUBSETS[year][subset]:
                continue
            query = query_class(
                query_id=query_id,
+956 −0

File added.

Preview size limit exceeded, changes collapsed.

+956 −0

File added.

Preview size limit exceeded, changes collapsed.

+956 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading