Loading README.md +2 −2 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ Each task comes with three *subsets*: #### Using the `train` set to train your supervised system ``` sh $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5 $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6 $ python >>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg >>> Loading Loading @@ -58,7 +58,7 @@ Here is the documentation of the available evaluation functions: #### Using the `validation` set to compare various parameters of your system ``` sh $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5 $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6 $ python >>> from arqmath_eval import get_topics, get_judged_documents >>> Loading scripts/common.py +1 −1 Original line number Diff line number Diff line Loading @@ -165,7 +165,7 @@ def get_random_ndcg(task, subset, topn=1000): for i, judgement in enumerate(judgements): ideal_dcg += judgement / log2(i + 2) random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 1.0 random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 0.0 random_ndcgs.append(random_ndcg) return np.mean(random_ndcgs) Loading setup.py +1 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ from setuptools import setup setup( name='arqmath_eval', version='0.0.5', version='0.0.6', description='Evaluation of ARQMath systems', packages=['arqmath_eval'], package_dir={'arqmath_eval': 'scripts'}, Loading test/test_get_random_ndcg.py +18 −2 Original line number Diff line number Diff line from math import log2 from random import random import unittest from arqmath_eval import get_random_ndcg from arqmath_eval import get_random_ndcg, get_ndcg, get_topics, get_judged_documents class TestGetRandomNDCG(unittest.TestCase): def test(self): def test_using_equation(self): ndcg = get_random_ndcg('task1', 'test') expected_judgement = ( Loading @@ -31,6 +32,21 @@ class TestGetRandomNDCG(unittest.TestCase): expected_ndcg = expected_dcg / expected_idcg self.assertEqual(expected_ndcg, ndcg) def test_using_estimation(self): task = 'task1-votes' subset = 'small-validation' results = {} for topic in get_topics(task, subset): results[topic] = {} for document in get_judged_documents(task, subset, topic): similarity = random() results[topic][document] = similarity ndcg = get_ndcg(results, task, subset) expected_ndcg = get_random_ndcg(task, subset) self.assertAlmostEqual(expected_ndcg, ndcg, places=3) def test_with_topn(self): ndcg = get_random_ndcg('task1', 'test', 4) Loading Loading
README.md +2 −2 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ Each task comes with three *subsets*: #### Using the `train` set to train your supervised system ``` sh $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5 $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6 $ python >>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg >>> Loading Loading @@ -58,7 +58,7 @@ Here is the documentation of the available evaluation functions: #### Using the `validation` set to compare various parameters of your system ``` sh $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5 $ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6 $ python >>> from arqmath_eval import get_topics, get_judged_documents >>> Loading
scripts/common.py +1 −1 Original line number Diff line number Diff line Loading @@ -165,7 +165,7 @@ def get_random_ndcg(task, subset, topn=1000): for i, judgement in enumerate(judgements): ideal_dcg += judgement / log2(i + 2) random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 1.0 random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 0.0 random_ndcgs.append(random_ndcg) return np.mean(random_ndcgs) Loading
setup.py +1 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ from setuptools import setup setup( name='arqmath_eval', version='0.0.5', version='0.0.6', description='Evaluation of ARQMath systems', packages=['arqmath_eval'], package_dir={'arqmath_eval': 'scripts'}, Loading
test/test_get_random_ndcg.py +18 −2 Original line number Diff line number Diff line from math import log2 from random import random import unittest from arqmath_eval import get_random_ndcg from arqmath_eval import get_random_ndcg, get_ndcg, get_topics, get_judged_documents class TestGetRandomNDCG(unittest.TestCase): def test(self): def test_using_equation(self): ndcg = get_random_ndcg('task1', 'test') expected_judgement = ( Loading @@ -31,6 +32,21 @@ class TestGetRandomNDCG(unittest.TestCase): expected_ndcg = expected_dcg / expected_idcg self.assertEqual(expected_ndcg, ndcg) def test_using_estimation(self): task = 'task1-votes' subset = 'small-validation' results = {} for topic in get_topics(task, subset): results[topic] = {} for document in get_judged_documents(task, subset, topic): similarity = random() results[topic][document] = similarity ndcg = get_ndcg(results, task, subset) expected_ndcg = get_random_ndcg(task, subset) self.assertAlmostEqual(expected_ndcg, ndcg, places=3) def test_with_topn(self): ndcg = get_random_ndcg('task1', 'test', 4) Loading