Commit b5fae0ab authored by Vít Novotný's avatar Vít Novotný
Browse files

Rename common.ndcg to common.get_ndcg

parent dd334afc
from .common import get_topics, get_judged_documents, ndcg
from .common import get_topics, get_judged_documents, get_ndcg as ndcg, get_ndcg
......@@ -46,7 +46,7 @@ def get_judged_documents(task='task1-votes', subset=None, topic=None):
return judged_documents
def ndcg(parsed_run, task='task1-votes', subset='train-validation'):
def get_ndcg(parsed_run, task='task1-votes', subset='train-validation'):
evaluator = EVALUATORS[subset][task]
only_judged_parsed_run = remove_nonjudged_topics_and_documents(parsed_run, task, subset)
evaluation = evaluator.evaluate(only_judged_parsed_run)
......
......@@ -6,7 +6,7 @@ import re
from pytrec_eval import parse_run
from .common import ndcg
from .common import get_ndcg
from .configuration import TASKS, TASK_README_HEAD, USER_README_HEAD
......@@ -21,16 +21,16 @@ if __name__ == '__main__':
result_name = re.sub('_', ', ', os.path.basename(result)[:-4])
with open(result, 'rt') as f:
parsed_result = parse_run(f)
user_results.append((ndcg(parsed_result, task, 'test'), result_name))
user_results.append((get_ndcg(parsed_result, task, 'test'), result_name))
best_ndcg, best_result_name = max(user_results)
task_results.append((best_ndcg, user_name, best_result_name))
with open(os.path.join(user, 'README.md'), 'wt') as f:
f.write(USER_README_HEAD % user_name)
f.write('\n')
for ndcg_score, result_name in sorted(user_results, reverse=True):
f.write('| %.4f | %s |\n' % (ndcg_score, result_name))
for ndcg, result_name in sorted(user_results, reverse=True):
f.write('| %.4f | %s |\n' % (ndcg, result_name))
with open(os.path.join(task, 'README.md'), 'wt') as f:
f.write(TASK_README_HEAD)
f.write('\n')
for ndcg_score, user_name, result_name in sorted(task_results, reverse=True):
f.write('| %.4f | %s | %s |\n' % (ndcg_score, user_name, result_name))
for ndcg, user_name, result_name in sorted(task_results, reverse=True):
f.write('| %.4f | %s | %s |\n' % (ndcg, user_name, result_name))
from math import log2
import unittest
from arqmath_eval import ndcg
from arqmath_eval import get_ndcg
class TestNDCG(unittest.TestCase):
def test_best_score(self):
def test_best(self):
parsed_run = {
'A.78': {
'493782': 1.00,
......@@ -28,11 +28,11 @@ class TestNDCG(unittest.TestCase):
'1282114': 0.15,
}
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
ndcg = get_ndcg(parsed_run, 'task1', 'test')
expected_ndcg = 1.0
self.assertEqual(expected_ndcg, ndcg)
def test_best_score_with_unjudged_topics(self):
def test_best_with_unjudged_topics(self):
parsed_run = {
'A.78': {
'493782': 1.00,
......@@ -59,11 +59,11 @@ class TestNDCG(unittest.TestCase):
'692232': 0.50,
},
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
ndcg = get_ndcg(parsed_run, 'task1', 'test')
expected_ndcg = 1.0
self.assertEqual(expected_ndcg, ndcg)
def test_best_score_with_unjudged_documents(self):
def test_best_with_unjudged_documents(self):
parsed_run = {
'A.78': {
'493782': 1.00,
......@@ -88,11 +88,11 @@ class TestNDCG(unittest.TestCase):
'unjudged_2': 0.05,
},
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
ndcg = get_ndcg(parsed_run, 'task1', 'test')
expected_ndcg = 1.0
self.assertEqual(expected_ndcg, ndcg)
def test_best_score_with_unjudged_topics_and_documents(self):
def test_best_with_unjudged_topics_and_documents(self):
parsed_run = {
'A.78': {
'493782': 1.00,
......@@ -121,11 +121,11 @@ class TestNDCG(unittest.TestCase):
'692232': 0.50,
},
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
ndcg = get_ndcg(parsed_run, 'task1', 'test')
expected_ndcg = 1.0
self.assertEqual(expected_ndcg, ndcg)
def test_worst_score(self):
def test_worst(self):
parsed_run = {
'A.78': {
'493782': 0.15,
......@@ -148,26 +148,27 @@ class TestNDCG(unittest.TestCase):
'1282114': 1.00,
}
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
dcg_score = 0.0
ndcg = get_ndcg(parsed_run, 'task1', 'test')
expected_dcg = 0.0
for i in range(1, 3):
dcg_score += 0.0 / log2(i + 1)
expected_dcg += 0.0 / log2(i + 1)
for i in range(3, 11):
dcg_score += 1.0 / log2(i + 1)
expected_dcg += 1.0 / log2(i + 1)
for i in range(11, 15):
dcg_score += 2.0 / log2(i + 1)
expected_dcg += 2.0 / log2(i + 1)
for i in range(15, 19):
dcg_score += 3.0 / log2(i + 1)
expected_dcg += 3.0 / log2(i + 1)
idcg_score = 0.0
expected_idcg = 0.0
for i in range(1, 5):
idcg_score += 3.0 / log2(i + 1)
expected_idcg += 3.0 / log2(i + 1)
for i in range(5, 9):
idcg_score += 2.0 / log2(i + 1)
expected_idcg += 2.0 / log2(i + 1)
for i in range(9, 17):
idcg_score += 1.0 / log2(i + 1)
expected_idcg += 1.0 / log2(i + 1)
for i in range(17, 19):
idcg_score += 0.0 / log2(i + 1)
expected_idcg += 0.0 / log2(i + 1)
expected_ndcg_score = dcg_score / idcg_score
self.assertEqual(expected_ndcg_score, ndcg_score)
expected_ndcg = expected_dcg / expected_idcg
self.assertEqual(expected_ndcg, ndcg)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment