Commit 873856e2 authored by Vít Novotný's avatar Vít Novotný
Browse files

Fix the scripts.evaluate CLI

parent 90521949
Loading
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ trained using subsets of the `task1` and `task2` tasks.
#### Using the `train` subset to train your supervised system

``` sh
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.18
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.19
$ python
>>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg
>>>
@@ -65,7 +65,7 @@ Here is the documentation of the available evaluation functions:
#### Using the `validation` subset to compare various parameters of your system

``` sh
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.18
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.19
$ python
>>> from arqmath_eval import get_topics, get_judged_documents
>>>
@@ -96,7 +96,7 @@ $ git push # publish your new result and the upd
#### Using the `all` subset to compute the NDCG' score of an ARQMath submission

``` sh
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.18
$ pip install --force-reinstall git+https://github.com/MIR-MU/ARQMath-eval@0.0.19
$ python -m arqmath_eval.evaluate MIRMU-task1-Ensemble-auto-both-A.tsv all
0.238, 95% CI: [0.198; 0.278]
```
+4 −1
Original line number Diff line number Diff line
# -*- coding:utf-8 -*-

from glob import glob
from itertools import repeat
from multiprocessing import Pool
import os.path
import re
@@ -13,7 +14,8 @@ from .common import get_ndcg, get_random_ndcg
from .configuration import TASKS, USER_README_HEAD, TASK_README_HEAD


def evaluate_worker(result_filename):
def evaluate_worker(args):
    task, result_filename = args
    result_name = re.sub('_', ', ', os.path.basename(result_filename)[:-4])
    with open(result_filename, 'rt') as f:
        parsed_result = parse_run(f)
@@ -35,6 +37,7 @@ def produce_leaderboards():
            results = glob(os.path.join(user, '*.tsv'))
            if results:
                results = tqdm(results, desc='Evaluating {} results'.format(user))
                results = zip(repeat(task), results)
                with Pool(None) as pool:
                    for result_name, ndcg in pool.map(evaluate_worker, results):
                        user_results.append((ndcg, result_name))
+1 −1
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ from setuptools import setup

setup(
    name='arqmath_eval',
    version='0.0.18',
    version='0.0.19',
    description='Evaluation of ARQMath systems',
    packages=['arqmath_eval'],
    package_dir={'arqmath_eval': 'scripts'},