Commit 9273b3ea authored by Vít Novotný's avatar Vít Novotný
Browse files

Add ARQMath task 1 and 2 relevance judgements

parent dcc5ddb0
......@@ -14,3 +14,11 @@ include scripts/votes-qrels-train.V1.0.tsv
include scripts/votes-qrels-small-validation.V1.0.tsv
include scripts/votes-qrels-validation.V1.0.tsv
include scripts/votes-qrels-test.V1.0.tsv
include scripts/qrel_task1-test.tsv
include scripts/qrel_task1-train.tsv
include scripts/qrel_task1.tsv
include scripts/qrel_task1-validation.tsv
include scripts/qrel_task2-test.tsv
include scripts/qrel_task2-train.tsv
include scripts/qrel_task2.tsv
include scripts/qrel_task2-validation.tsv
......@@ -3,16 +3,18 @@
This repository evaluates the performance of your information retrieval system
on a number of *tasks*:
- `task1`[ARQMath Task1][arqmath-task1] validation dataset,
- `task1-example`[ARQMath Task1][arqmath-task1] example dataset,
- `task1-votes`[ARQMath Task1][arqmath-task1] Math StackExchange [user votes][],
- `ntcir-11-math-2-main`[NTCIR-11 Math-2 Task Main Subtask][ntcir-11-math-2], and
- `ntcir-12-mathir-arxiv-main`[NTCIR-12 MathIR Task ArXiv Main Subtask][ntcir-12-mathir].
- `task1`[ARQMath Task1][arqmath-task1] final dataset,
- `ntcir-11-math-2-main`[NTCIR-11 Math-2 Task Main Subtask][ntcir-11-math-2],
- `ntcir-12-mathir-arxiv-main`[NTCIR-12 MathIR Task ArXiv Main Subtask][ntcir-12-mathir], and
- `ntcir-12-mathir-math-wiki-formula`[NTCIR-12 MathIR Task MathWikiFormula Subtask][ntcir-12-mathir].
- `task2`[ARQMath Task2][arqmath-task2] final dataset,
The main tasks are:
- `task1-votes` – Use this task to evaluate your ARQMath task 1 system.
- `ntcir-12-mathir-math-wiki-formula` – Use this task to evaluate your ARQMath task 2 system.
- `task1` – Use this task to evaluate your ARQMath task 1 system.
- `task2` – Use this task to evaluate your ARQMath task 2 system.
#### Subsets
Each task comes with three *subsets*:
......@@ -26,7 +28,11 @@ Each task comes with three *subsets*:
used at the end to compare the systems, which performed best on the
validation set.
### Usage
The `task1` and `task2` tasks come also with the `all` subset, which contains
all relevance judgements. Use these to evaluate a system that has not been
trained using subsets of the `task1` and `task2` tasks.
### Examples
#### Using the `train` set to train your supervised system
``` sh
......@@ -34,7 +40,7 @@ $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-e
$ python
>>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg
>>>
>>> task = 'task1-votes'
>>> task = 'task1'
>>> subset = 'train'
>>> results = {}
>>> for topic in get_topics(task=task, subset=subset):
......@@ -62,7 +68,7 @@ $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-e
$ python
>>> from arqmath_eval import get_topics, get_judged_documents
>>>
>>> task = 'task1-votes'
>>> task = 'task1'
>>> subset = 'validation'
>>> results = {}
>>> for topic in get_topics(task=task, subset=subset):
......@@ -90,6 +96,7 @@ $ git push # publish your new result and the upd
```
[arqmath-task1]: https://www.cs.rit.edu/~dprl/ARQMath/Task1-answers.html (Task 1: Find Answers)
[arqmath-task2]: https://www.cs.rit.edu/~dprl/ARQMath/task2-formulas.html (Task 2: Formula Search)
[get_judged_documents]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L61
[get_ndcg]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L94
[get_random_ndcg]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L129
......
......@@ -19,7 +19,9 @@ underscores (`_`) replaced with a comma and a space for improved readability.
'''.strip()
RELEVANCE_JUDGEMENTS = {
'train': {
'task1': 'qrel.V1.0-train.tsv',
'task1': 'qrel_task1-train.tsv',
'task2': 'qrel_task2-train.tsv',
'task1-example': 'qrel.V1.0-train.tsv',
'task1-votes': 'votes-qrels-train.V1.0.tsv',
'ntcir-11-math-2-main': 'NTCIR11_Math-qrels-train.dat',
'ntcir-12-mathir-arxiv-main': 'NTCIR12_Math-qrels_agg-train.dat',
......@@ -29,20 +31,26 @@ RELEVANCE_JUDGEMENTS = {
'task1-votes': 'votes-qrels-small-validation.V1.0.tsv',
},
'validation': {
'task1': 'qrel.V1.0-validation.tsv',
'task1': 'qrel_task1-validation.tsv',
'task2': 'qrel_task2-validation.tsv',
'task1-example': 'qrel.V1.0-validation.tsv',
'task1-votes': 'votes-qrels-validation.V1.0.tsv',
'ntcir-11-math-2-main': 'NTCIR11_Math-qrels-validation.dat',
'ntcir-12-mathir-arxiv-main': 'NTCIR12_Math-qrels_agg-validation.dat',
'ntcir-12-mathir-math-wiki-formula': 'NTCIR12_MathWikiFrm-qrels_agg-validation.dat',
},
'test': {
'task1': 'qrel.V1.0-test.tsv',
'task1': 'qrel_task1-test.tsv',
'task2': 'qrel_task2-test.tsv',
'task1-example': 'qrel.V1.0-test.tsv',
'task1-votes': 'votes-qrels-test.V1.0.tsv',
'ntcir-11-math-2-main': 'NTCIR11_Math-qrels-test.dat',
'ntcir-12-mathir-arxiv-main': 'NTCIR12_Math-qrels_agg-test.dat',
'ntcir-12-mathir-math-wiki-formula': 'NTCIR12_MathWikiFrm-qrels_agg-test.dat',
},
'all': {
'task1': 'qrel_task1.tsv',
'task2': 'qrel_task2.tsv',
'task1-votes.V1.2': 'votes-qrels.V1.2.tsv',
'task2-topics-formula_ids.V.1.1': 'topics-formula_ids-qrels.V1.1.tsv',
}
......
......@@ -22,6 +22,8 @@ def evaluate_worker(result_filename):
if __name__ == '__main__':
for task in TASKS:
if not os.path.exists(task):
continue
random_ndcg = get_random_ndcg(task, 'validation')
users = glob(os.path.join(task, '*', ''))
task_results = [(random_ndcg, 'random', 'xrando42')]
......
A.87 0 199152 0
A.87 0 1841 0
A.87 0 2213 0
A.87 0 7922 0
A.87 0 2171214 1
A.87 0 39394 1
A.87 0 59676 0
A.87 0 60064 0
A.87 0 61803 0
A.87 0 62210 0
A.87 0 64991 0
A.87 0 71592 0
A.87 0 488815 0
A.87 0 78035 0
A.87 0 78479 0
A.87 0 81445 0
A.87 0 84086 0
A.87 0 83998 0
A.87 0 85832 0
A.87 0 94804 0
A.87 0 102225 0
A.87 0 104612 0
A.87 0 118612 0
A.87 0 122979 0
A.87 0 128206 0
A.87 0 130805 0
A.87 0 130891 0
A.87 0 133179 0
A.87 0 137043 0
A.87 0 148011 0
A.87 0 148010 0
A.87 0 159566 0
A.87 0 165024 0
A.87 0 166625 0
A.87 0 469406 0
A.87 0 183852 0
A.87 0 1897697 0
A.87 0 191737 0
A.87 0 192597 0
A.87 0 194425 0
A.87 0 194976 2
A.87 0 197737 0
A.87 0 204723 0
A.87 0 238865 0
A.87 0 234844 0
A.87 0 240029 0
A.87 0 241212 0
A.87 0 256104 0
A.87 0 256116 0
A.87 0 256336 0
A.87 0 325374 0
A.87 0 261185 0
A.87 0 264921 0
A.87 0 267965 0
A.87 0 286318 0
A.87 0 286362 0
A.87 0 291735 1
A.87 0 2297353 0
A.87 0 300403 0
A.87 0 301082 0
A.87 0 302478 0
A.87 0 304873 0
A.87 0 320504 0
A.87 0 330315 0
A.87 0 331680 0
A.87 0 341543 0
A.87 0 350522 0
A.87 0 409919 0
A.87 0 363970 0
A.87 0 390581 0
A.87 0 395298 0
A.87 0 397918 0
A.87 0 397926 0
A.87 0 399895 0
A.87 0 412596 0
A.87 0 424410 0
A.87 0 435277 0
A.87 0 439375 0
A.87 0 591015 0
A.87 0 452249 0
A.87 0 460737 0
A.87 0 460804 0
A.87 0 460738 0
A.87 0 461378 0
A.87 0 463284 0
A.87 0 464468 0
A.87 0 467269 0
A.87 0 473533 0
A.87 0 476742 0
A.87 0 477145 0
A.87 0 484260 0
A.87 0 484275 0
A.87 0 490420 0
A.87 0 498117 0
A.87 0 504250 1
A.87 0 504249 3
A.87 0 1871874 0
A.87 0 504251 1
A.87 0 1871872 1
A.87 0 504267 3
A.87 0 1871894 1
A.87 0 520571 0
A.87 0 545486 0
A.87 0 553933 0
A.87 0 569585 0
A.87 0 557632 0
A.87 0 571924 0
A.87 0 572555 0
A.87 0 578601 0
A.87 0 589766 0
A.87 0 594040 0
A.87 0 607282 0
A.87 0 624855 0
A.87 0 638369 0
A.87 0 646231 0
A.87 0 658526 0
A.87 0 664255 0
A.87 0 669214 0
A.87 0 669839 1
A.87 0 697295 1
A.87 0 700855 0
A.87 0 863360 0
A.87 0 748420 0
A.87 0 750438 0
A.87 0 768251 0
A.87 0 770315 0
A.87 0 771561 0
A.87 0 776951 3
A.87 0 779173 0
A.87 0 782408 0
A.87 0 790541 0
A.87 0 800538 0
A.87 0 812245 0
A.87 0 808196 0
A.87 0 840063 0
A.87 0 822723 0
A.87 0 830151 0
A.87 0 1095998 0
A.87 0 838221 0
A.87 0 840605 0
A.87 0 846624 0
A.87 0 853714 0
A.87 0 854082 0
A.87 0 855696 0
A.87 0 876147 0
A.87 0 1073591 0
A.87 0 887275 0
A.87 0 887502 0
A.87 0 904176 0
A.87 0 911755 0
A.87 0 912344 3
A.87 0 920253 0
A.87 0 928713 0
A.87 0 2245350 0
A.87 0 935668 0
A.87 0 937046 0
A.87 0 937359 0
A.87 0 950131 0
A.87 0 981155 0
A.87 0 983570 0
A.87 0 985607 0
A.87 0 990052 0
A.87 0 1001568 0
A.87 0 1002609 0
A.87 0 1019028 0
A.87 0 1026675 0
A.87 0 1028171 0
A.87 0 1036904 0
A.87 0 1045754 0
A.87 0 1059841 0
A.87 0 1065250 0
A.87 0 1065064 0
A.87 0 1073608 0
A.87 0 1079406 0
A.87 0 1081016 0
A.87 0 1081888 0
A.87 0 1083140 0
A.87 0 1092376 0
A.87 0 1868829 0
A.87 0 1101541 0
A.87 0 1101584 0
A.87 0 1101894 0
A.87 0 1104845 0
A.87 0 1107599 0
A.87 0 1112377 0
A.87 0 1116574 0
A.87 0 1116487 0
A.87 0 1117286 0
A.87 0 1124960 0
A.87 0 1128381 0
A.87 0 1133210 0
A.87 0 1146351 0
A.87 0 1149119 0
A.87 0 1162438 2
A.87 0 1162488 3
A.87 0 1168819 0
A.87 0 1170040 0
A.87 0 1171579 0
A.87 0 2480664 0
A.87 0 1176599 0
A.87 0 1180622 0
A.87 0 1182882 0
A.87 0 1188700 0
A.87 0 1193513 0
A.87 0 1200564 0
A.87 0 1206305 0
A.87 0 1206263 0
A.87 0 1206271 1
A.87 0 1206985 0
A.87 0 1215294 0
A.87 0 1233925 0
A.87 0 1234667 0
A.87 0 1239508 0
A.87 0 1240979 0
A.87 0 1246657 0
A.87 0 1252676 0
A.87 0 1253993 0
A.87 0 1264526 0
A.87 0 1265311 0
A.87 0 1266710 0
A.87 0 1270170 0
A.87 0 1284622 0
A.87 0 1305795 0
A.87 0 1318812 0
A.87 0 1322181 0
A.87 0 1323341 0
A.87 0 1325350 0
A.87 0 1331379 0
A.87 0 2654832 0
A.87 0 1349839 0
A.87 0 1351448 0
A.87 0 1363545 0
A.87 0 1370740 0
A.87 0 1367947 0
A.87 0 1381710 0
A.87 0 1387348 0
A.87 0 1386110 0
A.87 0 1392276 0
A.87 0 1393320 0
A.87 0 1393861 0
A.87 0 1395469 0
A.87 0 1399027 0
A.87 0 1409019 0
A.87 0 1417547 0
A.87 0 1418751 0
A.87 0 1434761 0
A.87 0 1441167 0
A.87 0 1472237 0
A.87 0 1471310 0
A.87 0 1474919 0
A.87 0 1475007 0
A.87 0 1498331 0
A.87 0 1506474 3
A.87 0 1508954 0
A.87 0 1520157 0
A.87 0 1542491 0
A.87 0 1526115 0
A.87 0 1527390 0
A.87 0 1531393 1
A.87 0 1546512 0
A.87 0 1553709 0
A.87 0 1556911 0
A.87 0 1593488 0
A.87 0 1593330 0
A.87 0 1595970 0
A.87 0 1596083 0
A.87 0 1608508 0
A.87 0 1604776 0
A.87 0 1604998 0
A.87 0 1608698 0
A.87 0 1639099 1
A.87 0 1643744 0
A.87 0 1663951 0
A.87 0 1650728 0
A.87 0 1655890 0
A.87 0 1658332 0
A.87 0 1660785 0
A.87 0 1660859 0
A.87 0 1662506 0
A.87 0 1666869 0
A.87 0 1666219 0
A.87 0 1669596 0
A.87 0 1673468 3
A.87 0 1682285 0
A.87 0 1686480 1
A.87 0 1686463 2
A.87 0 1687271 0
A.87 0 1693591 0
A.87 0 1694893 0
A.87 0 1695365 0
A.87 0 1722238 0
A.87 0 1736330 0
A.87 0 1738308 0
A.87 0 1765265 0
A.87 0 2114206 0
A.87 0 1786157 0
A.87 0 1786883 0
A.87 0 1788054 0
A.87 0 1800476 0
A.87 0 2911238 0
A.87 0 1811701 0
A.87 0 1823034 1
A.87 0 1823731 0
A.87 0 1825553 0
A.87 0 1830187 0
A.87 0 1830246 0
A.87 0 1833559 0
A.87 0 2534929 2
A.87 0 1837914 0
A.87 0 1852461 0
A.87 0 1851997 0
A.87 0 1854292 0
A.87 0 1855546 0
A.87 0 1862633 0
A.87 0 1868724 0
A.87 0 1873056 0
A.87 0 1877715 0
A.87 0 1880489 0
A.87 0 1884701 0
A.87 0 1884329 1
A.87 0 1884364 0
A.87 0 1904388 0
A.87 0 1917533 0
A.87 0 1918865 3
A.87 0 1917652 3
A.87 0 1917653 3
A.87 0 1931672 0
A.87 0 1947023 0
A.87 0 1950615 0
A.87 0 2012863 0
A.87 0 1963250 0
A.87 0 1970229 0
A.87 0 1971488 0
A.87 0 1983192 0
A.87 0 1987104 0
A.87 0 1987667 0
A.87 0 1988546 0
A.87 0 2404187 0
A.87 0 1995856 0
A.87 0 1997569 0
A.87 0 1999170 0
A.87 0 2006145 0
A.87 0 2025743 0
A.87 0 2039776 0
A.87 0 2441745 0
A.87 0 2051619 0
A.87 0 2053280 0
A.87 0 2054263 0
A.87 0 2058463 0
A.87 0 2073746 0
A.87 0 2093777 0
A.87 0 2094419 0
A.87 0 2094261 0
A.87 0 2097776 1
A.87 0 2112947 0
A.87 0 2119168 1
A.87 0 2124305 0
A.87 0 2135909 0
A.87 0 2135653 0
A.87 0 2135842 0
A.87 0 2158416 1
A.87 0 2163810 0
A.87 0 2163691 0
A.87 0 2163934 0
A.87 0 2171787 0
A.87 0 2176883 0
A.87 0 2198703 0
A.87 0 2199309 0
A.87 0 2206497 0
A.87 0 2209892 0
A.87 0 2227142 0
A.87 0 2217146 0
A.87 0 2222461 0
A.87 0 2228216 0
A.87 0 2243671 0
A.87 0 2248418 0
A.87 0 2253056 0
A.87 0 2261824 0
A.87 0 2263625 0
A.87 0 2293988 0
A.87 0 2294734 0
A.87 0 2302593 2
A.87 0 2306884 0
A.87 0 2312871 0
A.87 0 2316143 1
A.87 0 2328130 0
A.87 0 2328192 0
A.87 0 2330675 1
A.87 0 2338289 0
A.87 0 2355523 0
A.87 0 2352937 0
A.87 0 2355651 0
A.87 0 2392008 0
A.87 0 2414038 1
A.87 0 2416398 0
A.87 0 2418245 0
A.87 0 2421590 0
A.87 0 2421976 0
A.87 0 2421975 0
A.87 0 2421978 0
A.87 0 2422195 0
A.87 0 2441913 0
A.87 0 2429321 0
A.87 0 2436936 0
A.87 0 2443204 1
A.87 0 2460699 1
A.87 0 2460557 2
A.87 0 2460540 1
A.87 0 2464307 0
A.87 0 2467707 2
A.87 0 2475742 0
A.87 0 2479344 0
A.87 0 2479316 0
A.87 0 2488264 0
A.87 0 2491391 0
A.87 0 2492640 1
A.87 0 2505348 0
A.87 0 2501390 0
A.87 0 2504777 0
A.87 0 2506674 0
A.87 0 2506688 0
A.87 0 2512615 0
A.87 0 2521350 0
A.87 0 2522210 2
A.87 0 2524774 0
A.87 0 2528674 0
A.87 0 2528703 0
A.87 0 2546760 0
A.87 0 2550692 0
A.87 0 2560512 0
A.87 0 2560513 0
A.87 0 2561208 0
A.87 0 2587597 0
A.87 0 2589430 0
A.87 0 2636694 0
A.87 0 2600722 0
A.87 0 2613629 0
A.87 0 2659303 0
A.87 0 2667955 0
A.87 0 2673173 0
A.87 0 2672001 0
A.87 0 2672993 0
A.87 0 2687492 0
A.87 0 2699546 0
A.87 0 2712721 0
A.87 0 2718123 0
A.87 0 2723805 0
A.87 0 2736412 0
A.87 0 2744238 0
A.87 0 2776538 0
A.87 0 2758947 0
A.87 0 2764852 0
A.87 0 2781753 0
A.87 0 2796229 0
A.87 0 2796935 0
A.87 0 2807129 0
A.87 0 2807944 0
A.87 0 2813161 0
A.87 0 2817587 0
A.87 0 2831102 0
A.87 0 2840777 0
A.87 0 2840693 0
A.87 0 2855732 0
A.87 0 2856351 0
A.87 0 2861247 0
A.87 0 2870556 0
A.87 0 2874290 0
A.87 0 2878216 0
A.87 0 2878948 0
A.87 0 2881757 0
A.87 0 2889944 0
A.87 0 2893152 0
A.87 0 2907369 0
A.87 0 2911515 0
A.87 0 2920599 0
A.87 0 2922807 0
A.87 0 2926708 0
A.87 0 2938828 0
A.87 0 2940817 0
A.87 0 2947965 1
A.87 0 2949373 0
A.87 0 2952565 0
A.87 0 2969734 0
A.87 0 2973628 0
A.87 0 2974477 0
A.87 0 2980474 0
A.87 0 2982775 0
A.87 0 2983655 0
A.87 0 3008818 0
A.87 0 3013953 1
A.87 0 3020490 0
A.87 0 3043128 0
A.87 0 3044712 0
A.88 0 2064916 0
A.88 0 2225733 0
A.88 0 2756521 0
A.88 0 2855584 0
A.88 0 1080884 0
A.88 0 1242276 1
A.88 0 956223 0
A.88 0 2578832 0
A.88 0 1276616 0
A.88 0 1568792 0
A.88 0 2878529 0
A.88 0 1875898 1
A.88 0 2017489 1
A.88 0 1141470 1
A.88 0 413818 0
A.88 0 448451 0
A.88 0 1607662 2
A.88 0 2446581 1
A.88 0 1506321 0
A.88 0 3042607 0
A.88 0 2093316 1
A.88 0 1954107 3
A.88 0 847141 0
A.88 0 1242277 0
A.88 0 1581837 1
A.88 0 222260 1
A.88 0 1946545 0
A.88 0 800157 0
A.88 0 2853105 0
A.88 0 1969977 0
A.88 0 1854429 0