Loading README.md +3 −3 Original line number Diff line number Diff line Loading @@ -51,9 +51,9 @@ Here is the documentation of the available evaluation functions: - [`get_topics(task, subset=None)`][get_topics], - [`get_judged_documents(task, subset=None, topic=None)`][get_judged_documents], - [`get_random_ndcg(task, subset, topn=1000)`][get_random_ndcg], - [`get_ndcg(parsed_run, task, subset, topn=1000)`][get_ndcg], and - [`get_random_normalized_ndcg(parsed_run, task, subset, topn=1000)`][get_random_normalized_ndcg]. - [`get_random_ndcg(task, subset, topn)`][get_random_ndcg], - [`get_ndcg(parsed_run, task, subset, topn)`][get_ndcg], and - [`get_random_normalized_ndcg(parsed_run, task, subset, topn, ndcg)`][get_random_normalized_ndcg]. #### Using the `validation` set to compare various parameters of your system Loading scripts/common.py +6 −2 Original line number Diff line number Diff line Loading @@ -171,7 +171,7 @@ def get_random_ndcg(task, subset, topn=1000): return np.mean(random_ndcgs) def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000): def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000, ndcg=None): """Returns the random-normalized NDCG' of a system's run on a subset of a task. NDCG' is the same as NDCG (Normalized Discounted Cumulative Gain), but all Loading @@ -193,6 +193,9 @@ def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000): topn : int, optional The top N results, which will be considered in computing the NDCG. Default is 1000. ndcg : float or None, optional The NDCG' to random-normalize. If not None, the parsed_run parameter will be ignored. Default is None. Returns ------- Loading @@ -200,6 +203,7 @@ def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000): The random-normalized NDCG' of the system's run on the subset of the task. """ if ndcg is None: ndcg = get_ndcg(parsed_run, task, subset, topn) random_ndcg = get_random_ndcg(task, subset, topn) random_normalized_ndcg = (ndcg - random_ndcg) / (1.0 - random_ndcg) Loading test/test_get_random_normalized_ndcg.py +12 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,18 @@ from arqmath_eval import get_random_ndcg, get_random_normalized_ndcg class TestGetRandomNormalizedNDCG(unittest.TestCase): def test_hand_picked(self): parsed_run = {} random_ndcg = get_random_ndcg('task1', 'test') random_normalized_ndcg = get_random_normalized_ndcg(parsed_run, 'task1', 'test', ndcg=1.0) expected_random_normalized_ndcg = 1.0 self.assertEqual(expected_random_normalized_ndcg, random_normalized_ndcg) random_normalized_ndcg = get_random_normalized_ndcg(parsed_run, 'task1', 'test', ndcg=random_ndcg) expected_random_normalized_ndcg = 0.0 self.assertEqual(expected_random_normalized_ndcg, random_normalized_ndcg) def test_best(self): parsed_run = { 'A.78': { Loading Loading
README.md +3 −3 Original line number Diff line number Diff line Loading @@ -51,9 +51,9 @@ Here is the documentation of the available evaluation functions: - [`get_topics(task, subset=None)`][get_topics], - [`get_judged_documents(task, subset=None, topic=None)`][get_judged_documents], - [`get_random_ndcg(task, subset, topn=1000)`][get_random_ndcg], - [`get_ndcg(parsed_run, task, subset, topn=1000)`][get_ndcg], and - [`get_random_normalized_ndcg(parsed_run, task, subset, topn=1000)`][get_random_normalized_ndcg]. - [`get_random_ndcg(task, subset, topn)`][get_random_ndcg], - [`get_ndcg(parsed_run, task, subset, topn)`][get_ndcg], and - [`get_random_normalized_ndcg(parsed_run, task, subset, topn, ndcg)`][get_random_normalized_ndcg]. #### Using the `validation` set to compare various parameters of your system Loading
scripts/common.py +6 −2 Original line number Diff line number Diff line Loading @@ -171,7 +171,7 @@ def get_random_ndcg(task, subset, topn=1000): return np.mean(random_ndcgs) def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000): def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000, ndcg=None): """Returns the random-normalized NDCG' of a system's run on a subset of a task. NDCG' is the same as NDCG (Normalized Discounted Cumulative Gain), but all Loading @@ -193,6 +193,9 @@ def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000): topn : int, optional The top N results, which will be considered in computing the NDCG. Default is 1000. ndcg : float or None, optional The NDCG' to random-normalize. If not None, the parsed_run parameter will be ignored. Default is None. Returns ------- Loading @@ -200,6 +203,7 @@ def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000): The random-normalized NDCG' of the system's run on the subset of the task. """ if ndcg is None: ndcg = get_ndcg(parsed_run, task, subset, topn) random_ndcg = get_random_ndcg(task, subset, topn) random_normalized_ndcg = (ndcg - random_ndcg) / (1.0 - random_ndcg) Loading
test/test_get_random_normalized_ndcg.py +12 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,18 @@ from arqmath_eval import get_random_ndcg, get_random_normalized_ndcg class TestGetRandomNormalizedNDCG(unittest.TestCase): def test_hand_picked(self): parsed_run = {} random_ndcg = get_random_ndcg('task1', 'test') random_normalized_ndcg = get_random_normalized_ndcg(parsed_run, 'task1', 'test', ndcg=1.0) expected_random_normalized_ndcg = 1.0 self.assertEqual(expected_random_normalized_ndcg, random_normalized_ndcg) random_normalized_ndcg = get_random_normalized_ndcg(parsed_run, 'task1', 'test', ndcg=random_ndcg) expected_random_normalized_ndcg = 0.0 self.assertEqual(expected_random_normalized_ndcg, random_normalized_ndcg) def test_best(self): parsed_run = { 'A.78': { Loading