Commit 15faa1af authored by Vít Novotný's avatar Vít Novotný
Browse files

Add random system performance to the leaderboards

parent 5d31c92a
Pipeline #60221 failed with stage
......@@ -30,7 +30,7 @@ Each task comes with three *subsets*:
#### Using the `train` set to train your supervised system
``` sh
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.7
$ python
>>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg
>>>
......@@ -58,7 +58,7 @@ Here is the documentation of the available evaluation functions:
#### Using the `validation` set to compare various parameters of your system
``` sh
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.7
$ python
>>> from arqmath_eval import get_topics, get_judged_documents
>>>
......
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6894* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6894* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6894* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6894* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6471* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6471* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6471* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.6471* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7336* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7336* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7336* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7336* | *random* |
......@@ -13,7 +13,6 @@ USER_README_HEAD = r'''
This table contains all results for user *%s* in descending order of task
performance. Result names are based on the filenames of the results with
underscores (`_`) replaced with a comma and a space for improved readability.
The random nDCG for this task is %.4f.
| nDCG | Result name |
|------|:------------|
......
......@@ -5,6 +5,7 @@ import os.path
import re
from pytrec_eval import parse_run
from tqdm import tqdm
from .common import get_ndcg, get_random_ndcg
from .configuration import TASKS, USER_README_HEAD
......@@ -13,18 +14,24 @@ from .configuration import TASKS, USER_README_HEAD
if __name__ == '__main__':
for task in TASKS:
random_ndcg = get_random_ndcg(task, 'validation')
for user in glob(os.path.join(task, '*', '')):
users = glob(os.path.join(task, '*', ''))
for user in users:
user = os.path.normpath(user)
user_name = os.path.basename(user)
user_results = []
for result in glob(os.path.join(user, '*.tsv')):
result_name = re.sub('_', ', ', os.path.basename(result)[:-4])
with open(result, 'rt') as f:
parsed_result = parse_run(f)
ndcg = get_ndcg(parsed_result, task, 'validation')
user_results.append((ndcg, result_name))
with open(os.path.join(user, 'README.md'), 'wt') as f:
f.write(USER_README_HEAD % (user_name, random_ndcg))
f.write('\n')
for ndcg, result_name in sorted(user_results, reverse=True):
f.write('| %.4f | %s |\n' % (ndcg, result_name))
user_results = [(random_ndcg, 'random')]
results = glob(os.path.join(user, '*.tsv'))
if results:
for result in tqdm(results, desc='Evaluating {} systems'.format(user)):
result_name = re.sub('_', ', ', os.path.basename(result)[:-4])
with open(result, 'rt') as f:
parsed_result = parse_run(f)
ndcg = get_ndcg(parsed_result, task, 'validation')
user_results.append((ndcg, result_name))
with open(os.path.join(user, 'README.md'), 'wt') as f:
f.write(USER_README_HEAD % user_name)
f.write('\n')
for ndcg, result_name in sorted(user_results, reverse=True):
if result_name == 'random':
f.write('| *%.4f* | *%s* |\n' % (ndcg, result_name))
else:
f.write('| %.4f | %s |\n' % (ndcg, result_name))
......@@ -5,13 +5,14 @@ from setuptools import setup
setup(
name='arqmath_eval',
version='0.0.6',
version='0.0.7',
description='Evaluation of ARQMath systems',
packages=['arqmath_eval'],
package_dir={'arqmath_eval': 'scripts'},
install_requires=[
'numpy~=1.18.2',
'pytrec-eval~=0.4',
'tqdm~=4.46.0',
],
package_data={
'arqmath_eval': [
......
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7578* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7578* | *random* |
......@@ -4,3 +4,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| nDCG | Result name |
|------|:------------|
| *0.7578* | *random* |
......@@ -9,3 +9,4 @@ underscores (`_`) replaced with a comma and a space for improved readability.
| 0.7602 | sbert, validation, prefix, datav1.0, exid23 |
| 0.7598 | sbert, validation, exid25, no-token-type, datav1.0 |
| 0.7597 | sbert, validation, exid24, token-type, datav1.0 |
| *0.7578* | *random* |
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment