Unverified Commit 8a145e95 authored by Vít Novotný's avatar Vít Novotný
Browse files

Use only top 1000 results in the runs (cont.)

parent 53d55b45
Pipeline #59150 failed with stage
......@@ -93,7 +93,7 @@ $ git push # publish your new result and the upd
[get_judged_documents]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L59
[get_ndcg]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L90
[get_random_ndcg]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L124
[get_random_normalized_ndcg]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L169
[get_random_normalized_ndcg]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L167
[get_topics]: https://gitlab.fi.muni.cz/xstefan3/arqmath-eval/-/blob/master/scripts/common.py#L34
[ntcir-11-math-2]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.686.444&rep=rep1&type=pdf (NTCIR-11 Math-2 Task Overview)
[ntcir-12-mathir]: https://www.cs.rit.edu/~rlaz/files/ntcir12-mathir.pdf (NTCIR-12 MathIR Task Overview)
......
......@@ -153,15 +153,11 @@ def get_random_ndcg(task, subset, topn=1000):
expected_judgement = np.mean(judgements)
random_dcg = 0.0
for i in range(len(judgements)):
if i >= topn:
break
for i in range(min(len(judgements), topn)):
random_dcg += expected_judgement / log2(i + 2)
ideal_dcg = 0.0
for i, judgement in enumerate(judgements):
if i >= topn:
break
ideal_dcg += judgement / log2(i + 2)
random_ndcg = random_dcg / ideal_dcg
......
......@@ -47,6 +47,12 @@ class TestGetRandomNDCG(unittest.TestCase):
expected_idcg = 0.0
for i in range(1, 5):
expected_idcg += 3.0 / log2(i + 1)
for i in range(5, 9):
expected_idcg += 2.0 / log2(i + 1)
for i in range(9, 17):
expected_idcg += 1.0 / log2(i + 1)
for i in range(17, 19):
expected_idcg += 0.0 / log2(i + 1)
expected_ndcg = expected_dcg / expected_idcg
self.assertEqual(expected_ndcg, ndcg)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment