Commit 63708218 authored by Vít Novotný's avatar Vít Novotný
Browse files

Create per-task leaderboards

parent 2496357e
Pipeline #61718 failed with stage
This table contains the best result for every user on the *ntcir-11-math-2-main* task.
| nDCG | Result name | User |
|:-----|:------------|------|
| *0.6894* | *random* | *Mr. Random* |
This table contains the best result for every user on the *ntcir-12-mathir-arxiv-main* task.
| nDCG | Result name | User |
|:-----|:------------|------|
| *0.6471* | *random* | *Mr. Random* |
This table contains the best result for every user on the *ntcir-12-mathir-math-wiki-formula* task.
| nDCG | Result name | User |
|:-----|:------------|------|
| *0.7336* | *random* | *Mr. Random* |
......@@ -4,10 +4,10 @@ from pytrec_eval import parse_qrel, RelevanceEvaluator
TASK_README_HEAD = r'''
This table contains the best result for every user.
This table contains the best result for every user on the *%s* task.
| nDCG | User | Result name |
|:-----|------|:------------|
| nDCG | Result name | User |
|:-----|:------------|------|
'''.strip()
USER_README_HEAD = r'''
This table contains all results for user *%s* in descending order of task
......
......@@ -9,7 +9,7 @@ from pytrec_eval import parse_run
from tqdm import tqdm
from .common import get_ndcg, get_random_ndcg
from .configuration import TASKS, USER_README_HEAD
from .configuration import TASKS, USER_README_HEAD, TASK_README_HEAD
def evaluate_worker(result_filename):
......@@ -24,6 +24,7 @@ if __name__ == '__main__':
for task in TASKS:
random_ndcg = get_random_ndcg(task, 'validation')
users = glob(os.path.join(task, '*', ''))
task_results = [(random_ndcg, 'random', 'Mr. Random')]
for user in users:
user = os.path.normpath(user)
user_name = os.path.basename(user)
......@@ -37,6 +38,7 @@ if __name__ == '__main__':
with open(os.path.join(user, 'README.md'), 'wt') as f_readme:
f_readme.write(USER_README_HEAD % user_name)
f_readme.write('\n')
task_results.append((*max(user_results), user_name))
for ndcg, result_name in sorted(user_results, reverse=True):
if result_name == 'random':
f_readme.write('| *%.4f* | *%s* |\n' % (ndcg, result_name))
......@@ -48,4 +50,11 @@ if __name__ == '__main__':
f_readme.write(f_legend.read())
except IOError:
pass
with open(os.path.join(task, 'README.md'), 'wt') as f_readme:
f_readme.write(TASK_README_HEAD % task)
f_readme.write('\n')
for ndcg, result_name, user_name in sorted(task_results, reverse=True):
if result_name == 'random':
f_readme.write('| *%.4f* | *%s* | *%s* |\n' % (ndcg, result_name, user_name))
else:
f_readme.write('| %.4f | %s | %s |\n' % (ndcg, result_name, user_name))
......@@ -5,7 +5,7 @@ from setuptools import setup
setup(
name='arqmath_eval',
version='0.0.9',
version='0.0.10',
description='Evaluation of ARQMath systems',
packages=['arqmath_eval'],
package_dir={'arqmath_eval': 'scripts'},
......
This table contains the best result for every user on the *task1-votes* task.
| nDCG | Result name | User |
|:-----|:------------|------|
| 0.7796 | sbert, validation, html-removal, exid9 | xstefan3 |
| 0.7614 | prefix, phrases=2, alpha=0.05, bucket=2000000, iter=5, max-n=6, min-alpha=0, min-count=5, min-n=3, negative=5, sample=0.0001, sg=1, size=300, window=5, workers=64, dominant=True, nonzero-limit=800, symmetric=True, exponent=4.0, threshold=-1.0 | xnovot32 |
| 0.7604 | prefix, phrases=2, alpha=0.1, dm=0, dm-concat=1, epochs=5, hs=0, min-alpha=0, min-count=5, negative=12, vector-size=300, window=8, workers=64 | ayetiran |
| *0.7578* | *random* | *Mr. Random* |
This table contains the best result for every user on the *task1* task.
| nDCG | Result name | User |
|:-----|:------------|------|
| *0.8477* | *random* | *xstefan3* |
| *0.8477* | *random* | *xnovot32* |
| *0.8477* | *random* | *xluptak4* |
| *0.8477* | *random* | *ayetiran* |
| *0.8477* | *random* | *Mr. Random* |
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment