Commit 13217011 authored by Vít Novotný's avatar Vít Novotný
Browse files

Fix scripts.common.get_random_ndcg

parent db16de5f
Pipeline #60195 failed with stage
......@@ -30,7 +30,7 @@ Each task comes with three *subsets*:
#### Using the `train` set to train your supervised system
``` sh
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6
$ python
>>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg
>>>
......@@ -58,7 +58,7 @@ Here is the documentation of the available evaluation functions:
#### Using the `validation` set to compare various parameters of your system
``` sh
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6
$ python
>>> from arqmath_eval import get_topics, get_judged_documents
>>>
......
......@@ -165,7 +165,7 @@ def get_random_ndcg(task, subset, topn=1000):
for i, judgement in enumerate(judgements):
ideal_dcg += judgement / log2(i + 2)
random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 1.0
random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 0.0
random_ndcgs.append(random_ndcg)
return np.mean(random_ndcgs)
......
......@@ -5,7 +5,7 @@ from setuptools import setup
setup(
name='arqmath_eval',
version='0.0.5',
version='0.0.6',
description='Evaluation of ARQMath systems',
packages=['arqmath_eval'],
package_dir={'arqmath_eval': 'scripts'},
......
from math import log2
from random import random
import unittest
from arqmath_eval import get_random_ndcg
from arqmath_eval import get_random_ndcg, get_ndcg, get_topics, get_judged_documents
class TestGetRandomNDCG(unittest.TestCase):
def test(self):
def test_using_equation(self):
ndcg = get_random_ndcg('task1', 'test')
expected_judgement = (
......@@ -31,6 +32,21 @@ class TestGetRandomNDCG(unittest.TestCase):
expected_ndcg = expected_dcg / expected_idcg
self.assertEqual(expected_ndcg, ndcg)
def test_using_estimation(self):
task = 'task1-votes'
subset = 'small-validation'
results = {}
for topic in get_topics(task, subset):
results[topic] = {}
for document in get_judged_documents(task, subset, topic):
similarity = random()
results[topic][document] = similarity
ndcg = get_ndcg(results, task, subset)
expected_ndcg = get_random_ndcg(task, subset)
self.assertAlmostEqual(expected_ndcg, ndcg, places=3)
def test_with_topn(self):
ndcg = get_random_ndcg('task1', 'test', 4)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment