Commit 13217011 authored by Vít Novotný's avatar Vít Novotný
Browse files

Fix scripts.common.get_random_ndcg

parent db16de5f
Loading
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ Each task comes with three *subsets*:
#### Using the `train` set to train your supervised system

``` sh
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6
$ python
>>> from arqmath_eval import get_topics, get_judged_documents, get_ndcg
>>>
@@ -58,7 +58,7 @@ Here is the documentation of the available evaluation functions:
#### Using the `validation` set to compare various parameters of your system

``` sh
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.5
$ pip install --force-reinstall git+https://gitlab.fi.muni.cz/xstefan3/arqmath-eval@0.0.6
$ python
>>> from arqmath_eval import get_topics, get_judged_documents
>>>
+1 −1
Original line number Diff line number Diff line
@@ -165,7 +165,7 @@ def get_random_ndcg(task, subset, topn=1000):
        for i, judgement in enumerate(judgements):
            ideal_dcg += judgement / log2(i + 2)

        random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 1.0
        random_ndcg = random_dcg / ideal_dcg if ideal_dcg > 0 else 0.0
        random_ndcgs.append(random_ndcg)

    return np.mean(random_ndcgs)
+1 −1
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ from setuptools import setup

setup(
    name='arqmath_eval',
    version='0.0.5',
    version='0.0.6',
    description='Evaluation of ARQMath systems',
    packages=['arqmath_eval'],
    package_dir={'arqmath_eval': 'scripts'},
+18 −2
Original line number Diff line number Diff line
from math import log2
from random import random
import unittest

from arqmath_eval import get_random_ndcg
from arqmath_eval import get_random_ndcg, get_ndcg, get_topics, get_judged_documents


class TestGetRandomNDCG(unittest.TestCase):
    def test(self):
    def test_using_equation(self):
        ndcg = get_random_ndcg('task1', 'test')

        expected_judgement = (
@@ -31,6 +32,21 @@ class TestGetRandomNDCG(unittest.TestCase):
        expected_ndcg = expected_dcg / expected_idcg
        self.assertEqual(expected_ndcg, ndcg)

    def test_using_estimation(self):
        task = 'task1-votes'
        subset = 'small-validation'

        results = {}
        for topic in get_topics(task, subset):
            results[topic] = {}
            for document in get_judged_documents(task, subset, topic):
                similarity = random()
                results[topic][document] = similarity

        ndcg = get_ndcg(results, task, subset)
        expected_ndcg = get_random_ndcg(task, subset)
        self.assertAlmostEqual(expected_ndcg, ndcg, places=3)

    def test_with_topn(self):
        ndcg = get_random_ndcg('task1', 'test', 4)