Commit e7279bfa authored by Vít Novotný's avatar Vít Novotný
Browse files

Add Math StackExchange user votes

parent cc5aeb0e
Pipeline #58133 failed with stage
in 43 seconds
......@@ -6,3 +6,7 @@ scripts/NTCIR12_MathWikiFrm-qrels_agg-train.dat
scripts/NTCIR12_MathWikiFrm-qrels_agg-test.dat
scripts/qrel.V1.0-train.tsv
scripts/qrel.V1.0-test.tsv
scripts/votes-qrels-train.V1.0.tsv
scripts/votes-qrels-train-train.V1.0.tsv
scripts/votes-qrels-train-validation.V1.0.tsv
scripts/votes-qrels-test.V1.0.tsv
......@@ -11,7 +11,8 @@ pip install git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@master
This repository evaluates the performance of your information retrieval system
on a number of *tasks*:
- `task1/`[ARQMath Task1][arqmath-task1] validation dataset,
- `task1/`[ARQMath Task1][arqmath-task1] validation dataset,
- `task1-votes/`[ARQMath Task1][arqmath-task1] Math StackExchange [user votes][],
- `ntcir-11-math-2-main/`[NTCIR-11 Math-2 Task Main Subtask][ntcir-11-math-2], and
- `ntcir-12-mathir-arxiv-main/`[NTCIR-12 MathIR Task ArXiv Main Subtask][ntcir-12-mathir].
- `ntcir-12-mathir-math-wiki-formula/`[NTCIR-12 MathIR Task MathWikiFormula Subtask][ntcir-12-mathir].
......@@ -34,3 +35,4 @@ $ git push # publish your new result and the updated lea
[treceval-format]: https://stackoverflow.com/a/8175382/657401 (How to evaluate a search/retrieval engine using trec_eval?)
[ntcir-11-math-2]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.686.444&rep=rep1&type=pdf (NTCIR-11 Math-2 Task Overview)
[ntcir-12-mathir]: https://www.cs.rit.edu/~rlaz/files/ntcir12-mathir.pdf (NTCIR-12 MathIR Task Overview)
[user votes]: https://gitlab.fi.muni.cz/xnovot32/arqmath-data-preprocessing/-/blob/master/scripts/xml_to_qrels_tsv.py
from .common import get_judged_documents, ndcg
from .common import get_topics, get_judged_documents, ndcg
......@@ -21,15 +21,27 @@ def remove_nonjudged_topics_and_documents(parsed_run, task, subset):
return only_judged_parsed_run
def get_judged_documents(task='task1'):
def get_topics(task='task1-votes', subset=None):
topics = set()
subsets = PARSED_RELEVANCE_JUDGEMENTS.values() if subset is None else [PARSED_RELEVANCE_JUDGEMENTS[subset]]
for subset in subsets:
for topic in subset[task].keys():
topics.add(topic)
return topics
def get_judged_documents(task='task1-votes', subset=None, topic=None):
judged_documents = set()
for subset in PARSED_RELEVANCE_JUDGEMENTS.values():
for documents in subset[task].values():
subsets = PARSED_RELEVANCE_JUDGEMENTS.values() if subset is None else [PARSED_RELEVANCE_JUDGEMENTS[subset]]
for subset in subsets:
topics = subset[task].values() if topic is None else [subset[task][topic]]
for topic in topics:
documents = topic.keys()
judged_documents.update(documents)
return judged_documents
def ndcg(parsed_run, task='task1', subset='train'):
def ndcg(parsed_run, task='task1-votes', subset='train-validation'):
evaluator = EVALUATORS[subset][task]
only_judged_parsed_run = remove_nonjudged_topics_and_documents(parsed_run, task, subset)
evaluation = evaluator.evaluate(only_judged_parsed_run)
......
......@@ -20,12 +20,20 @@ underscores (`_`) replaced with a comma and a space for improved readability.
RELEVANCE_JUDGEMENTS = {
'train': {
'task1': 'qrel.V1.0-train.tsv',
'task1-votes': 'votes-qrels-train.V1.0.tsv',
'ntcir-11-math-2-main': 'NTCIR11_Math-qrels-train.dat',
'ntcir-12-mathir-arxiv-main': 'NTCIR12_Math-qrels_agg-train.dat',
'ntcir-12-mathir-math-wiki-formula': 'NTCIR12_MathWikiFrm-qrels_agg-train.dat',
},
'train-train': {
'task1-votes': 'votes-qrels-train-train.V1.0.tsv',
},
'train-validation': {
'task1-votes': 'votes-qrels-train-validation.V1.0.tsv',
},
'test': {
'task1': 'qrel.V1.0-test.tsv',
'task1-votes': 'votes-qrels-test.V1.0.tsv',
'ntcir-11-math-2-main': 'NTCIR11_Math-qrels-test.dat',
'ntcir-12-mathir-arxiv-main': 'NTCIR12_Math-qrels_agg-test.dat',
'ntcir-12-mathir-math-wiki-formula': 'NTCIR12_MathWikiFrm-qrels_agg-test.dat',
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -6,7 +6,7 @@ from setuptools import setup
setup(
name='arqmath_eval',
version='0.0.1',
description='Evaluation of ARQMath systens',
description='Evaluation of ARQMath systems',
packages=['arqmath_eval'],
package_dir={'arqmath_eval': 'scripts'},
install_requires=[
......@@ -23,6 +23,10 @@ setup(
'NTCIR12_MathWikiFrm-qrels_agg-test.dat',
'qrel.V1.0-train.tsv',
'qrel.V1.0-test.tsv',
'votes-qrels-train.V1.0.tsv',
'votes-qrels-train-train.V1.0.tsv',
'votes-qrels-train-validation.V1.0.tsv',
'votes-qrels-test.V1.0.tsv',
],
},
include_package_data=True,
......
This table contains the best result for every user.
| nDCG | User | Result name |
|:-----|------|:------------|
| 0.7519 | xstefan3 | example, key1=value1, key2=value2, etc |
| 0.7519 | xnovot32 | example, key1=value1, key2=value2, etc |
| 0.7519 | xluptak4 | example, key1=value1, key2=value2, etc |
| 0.7519 | ayetiran | example, key1=value1, key2=value2, etc |
This table contains all results for user *ayetiran* in descending order of task
performance. Result names are based on the filenames of the results with
underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| 0.7519 | example, key1=value1, key2=value2, etc |
This diff is collapsed.
This table contains all results for user *xluptak4* in descending order of task
performance. Result names are based on the filenames of the results with
underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| 0.7519 | example, key1=value1, key2=value2, etc |
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment