Commit 873856e2 authored by Vít Novotný's avatar Vít Novotný
Browse files

Fix the scripts.evaluate CLI

parent 90521949
Pipeline #63013 failed with stage
......@@ -36,7 +36,7 @@ trained using subsets of the `task1` and `task2` tasks.
#### Using the `train` subset to train your supervised system
``` sh
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.18
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.19
$ python
>>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg
>>>
......@@ -65,7 +65,7 @@ Here is the documentation of the available evaluation functions:
#### Using the `validation` subset to compare various parameters of your system
``` sh
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.18
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.19
$ python
>>> from arqmath_eval import get_topics, get_judged_documents
>>>
......@@ -96,7 +96,7 @@ $ git push # publish your new result and the upd
#### Using the `all` subset to compute the NDCG' score of an ARQMath submission
``` sh
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.18
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.19
$ python -m arqmath_eval.evaluate MIRMU-task1-Ensemble-auto-both-A.tsv all
0.238, 95% CI: [0.198; 0.278]
```
......
# -*- coding:utf-8 -*-
from glob import glob
from itertools import repeat
from multiprocessing import Pool
import os.path
import re
......@@ -13,7 +14,8 @@ from .common import get_ndcg, get_random_ndcg
from .configuration import TASKS, USER_README_HEAD, TASK_README_HEAD
def evaluate_worker(result_filename):
def evaluate_worker(args):
task, result_filename = args
result_name = re.sub('_', ', ', os.path.basename(result_filename)[:-4])
with open(result_filename, 'rt') as f:
parsed_result = parse_run(f)
......@@ -35,6 +37,7 @@ def produce_leaderboards():
results = glob(os.path.join(user, '*.tsv'))
if results:
results = tqdm(results, desc='Evaluating {} results'.format(user))
results = zip(repeat(task), results)
with Pool(None) as pool:
for result_name, ndcg in pool.map(evaluate_worker, results):
user_results.append((ndcg, result_name))
......
......@@ -5,7 +5,7 @@ from setuptools import setup
setup(
name='arqmath_eval',
version='0.0.18',
version='0.0.19',
description='Evaluation of ARQMath systems',
packages=['arqmath_eval'],
package_dir={'arqmath_eval': 'scripts'},
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment