Commit dd334afc authored by Vít Novotný's avatar Vít Novotný
Browse files

Add unit tests

parent 7239538f
Pipeline #58284 canceled with stage
scripts/NTCIR11_Math-qrels-train.dat
scripts/NTCIR11_Math-qrels-test.dat
scripts/NTCIR12_Math-qrels_agg-train.dat
scripts/NTCIR12_Math-qrels_agg-test.dat
scripts/NTCIR12_MathWikiFrm-qrels_agg-train.dat
scripts/NTCIR12_MathWikiFrm-qrels_agg-test.dat
scripts/qrel.V1.0-train.tsv
scripts/qrel.V1.0-test.tsv
scripts/votes-qrels-train.V1.0.tsv
scripts/votes-qrels-train-train.V1.0.tsv
scripts/votes-qrels-train-validation.V1.0.tsv
scripts/votes-qrels-test.V1.0.tsv
include scripts/NTCIR11_Math-qrels-train.dat
include scripts/NTCIR11_Math-qrels-test.dat
include scripts/NTCIR12_Math-qrels_agg-train.dat
include scripts/NTCIR12_Math-qrels_agg-test.dat
include scripts/NTCIR12_MathWikiFrm-qrels_agg-train.dat
include scripts/NTCIR12_MathWikiFrm-qrels_agg-test.dat
include scripts/qrel.V1.0-train.tsv
include scripts/qrel.V1.0-test.tsv
include scripts/votes-qrels-train.V1.0.tsv
include scripts/votes-qrels-train-train.V1.0.tsv
include scripts/votes-qrels-train-validation.V1.0.tsv
include scripts/votes-qrels-test.V1.0.tsv
import unittest
from arqmath_eval import get_judged_documents
class TestGetJudgedDocuments(unittest.TestCase):
def test_all_subsets_and_all_topics(self):
documents = get_judged_documents('task1')
expected_documents = {
'48162',
'48164',
'48165',
'48167',
'48172',
'48181',
'48184',
'48202',
'48219',
'48235',
'48241',
'48260',
'53779',
'53781',
'53784',
'53790',
'69435',
'70739',
'70741',
'98328',
'168286',
'168290',
'168305',
'168323',
'263828',
'264299',
'264315',
'264329',
'319916',
'319917',
'319919',
'319938',
'319993',
'439027',
'439044',
'439055',
'439132',
'472635',
'493764',
'493782',
'496898',
'496909',
'574514',
'616315',
'616321',
'616373',
'616514',
'672516',
'692232',
'743738',
'860842',
'876137',
'876221',
'897705',
'982759',
'1018719',
'1116368',
'1116370',
'1116378',
'1282112',
'1282114',
'1282116',
'1282155',
'1282166',
'1282180',
'1489896',
'1596444',
'1609339',
'1623400',
'1639289',
'2008449',
'2008609',
'2008616',
'2008628',
'2008631',
'2008650',
'2008712',
'2170920',
'2227543',
'2362771',
'2602592',
'2780928',
'2968174',
}
self.assertEqual(expected_documents, documents)
def test_selected_subsets_all_topics(self):
documents = get_judged_documents('task1', 'train')
expected_documents = {
'48162',
'48164',
'48165',
'48167',
'48172',
'48181',
'48184',
'48202',
'48219',
'48235',
'48241',
'48260',
'53779',
'53781',
'53784',
'53790',
'69435',
'70739',
'70741',
'98328',
'168286',
'168290',
'168305',
'168323',
'263828',
'264299',
'264315',
'264329',
'319916',
'319917',
'319919',
'319938',
'319993',
'439027',
'439044',
'439055',
'439132',
'472635',
'496898',
'496909',
'574514',
'616315',
'616321',
'616373',
'616514',
'672516',
'692232',
'743738',
'860842',
'876137',
'876221',
'897705',
'982759',
'1018719',
'1116368',
'1282180',
'1489896',
'1596444',
'1609339',
'1623400',
'1639289',
'2170920',
'2227543',
'2362771',
'2602592',
'2780928',
'2968174',
}
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', 'test')
expected_documents = {
'493764',
'493782',
'1116368',
'1116370',
'1116378',
'1282112',
'1282114',
'1282116',
'1282155',
'1282166',
'1282180',
'2008449',
'2008609',
'2008616',
'2008628',
'2008631',
'2008650',
'2008712',
}
self.assertEqual(expected_documents, documents)
def test_all_subsets_selected_topics(self):
documents = get_judged_documents('task1', topic='A.31')
expected_documents = {
'48162',
'48164',
'48165',
'48167',
'48172',
'48181',
'48184',
'48202',
'48219',
'48235',
'48241',
'48260',
'53779',
'53781',
'53784',
'53790',
'69435',
'70739',
'70741',
'98328',
'168286',
'168290',
'168305',
'168323',
'264329',
'439044',
'472635',
'574514',
'616315',
'616321',
'616373',
'616514',
'672516',
'692232',
'743738',
'860842',
'897705',
'982759',
'1018719',
'1116368',
'1282180',
'1596444',
'1609339',
'1623400',
'1639289',
'2170920',
'2227543',
'2362771',
'2602592',
'2968174',
}
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', topic='A.101')
expected_documents = {
'263828',
'264299',
'264315',
'264329',
'319916',
'319917',
'319919',
'319938',
'319993',
'439027',
'439044',
'439055',
'439132',
'496898',
'496909',
'876137',
'876221',
'1489896',
'2780928',
}
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', topic='A.78')
expected_documents = {
'493764',
'493782',
'1116368',
'1116370',
'1116378',
'1282112',
'1282114',
'1282116',
'1282155',
'1282166',
'1282180',
'2008449',
'2008609',
'2008616',
'2008628',
'2008631',
'2008650',
'2008712',
}
self.assertEqual(expected_documents, documents)
def test_selected_subsets_selected_topics(self):
documents = get_judged_documents('task1', 'train', 'A.31')
expected_documents = {
'48162',
'48164',
'48165',
'48167',
'48172',
'48181',
'48184',
'48202',
'48219',
'48235',
'48241',
'48260',
'53779',
'53781',
'53784',
'53790',
'69435',
'70739',
'70741',
'98328',
'168286',
'168290',
'168305',
'168323',
'264329',
'439044',
'472635',
'574514',
'616315',
'616321',
'616373',
'616514',
'672516',
'692232',
'743738',
'860842',
'897705',
'982759',
'1018719',
'1116368',
'1282180',
'1596444',
'1609339',
'1623400',
'1639289',
'2170920',
'2227543',
'2362771',
'2602592',
'2968174',
}
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', 'test', 'A.31')
expected_documents = set()
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', 'train', 'A.101')
expected_documents = {
'263828',
'264299',
'264315',
'264329',
'319916',
'319917',
'319919',
'319938',
'319993',
'439027',
'439044',
'439055',
'439132',
'496898',
'496909',
'876137',
'876221',
'1489896',
'2780928',
}
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', 'test', 'A.101')
expected_documents = set()
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', 'train', 'A.78')
expected_documents = set()
self.assertEqual(expected_documents, documents)
documents = get_judged_documents('task1', 'test', 'A.78')
expected_documents = {
'493764',
'493782',
'1116368',
'1116370',
'1116378',
'1282112',
'1282114',
'1282116',
'1282155',
'1282166',
'1282180',
'2008449',
'2008609',
'2008616',
'2008628',
'2008631',
'2008650',
'2008712',
}
self.assertEqual(expected_documents, documents)
import unittest
from arqmath_eval import get_topics
class TestGetTopics(unittest.TestCase):
def test_all_subsets(self):
topics = get_topics('task1')
expected_topics = {'A.31', 'A.101', 'A.78'}
self.assertEqual(expected_topics, topics)
def test_selected_subsets(self):
topics = get_topics('task1', 'train')
expected_topics = {'A.31', 'A.101'}
self.assertEqual(expected_topics, topics)
topics = get_topics('task1', 'test')
expected_topics = {'A.78'}
self.assertEqual(expected_topics, topics)
from math import log2
import unittest
from arqmath_eval import ndcg
class TestNDCG(unittest.TestCase):
def test_best_score(self):
parsed_run = {
'A.78': {
'493782': 1.00,
'493764': 0.95,
'2008712': 0.90,
'1282166': 0.85,
'2008631': 0.80,
'2008628': 0.75,
'2008609': 0.70,
'1116378': 0.65,
'2008650': 0.60,
'2008616': 0.55,
'2008449': 0.50,
'1282180': 0.45,
'1282116': 0.40,
'1282112': 0.35,
'1116370': 0.30,
'1116368': 0.25,
'1282155': 0.20,
'1282114': 0.15,
}
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
def test_best_score_with_unjudged_topics(self):
parsed_run = {
'A.78': {
'493782': 1.00,
'493764': 0.95,
'2008712': 0.90,
'1282166': 0.85,
'2008631': 0.80,
'2008628': 0.75,
'2008609': 0.70,
'1116378': 0.65,
'2008650': 0.60,
'2008616': 0.55,
'2008449': 0.50,
'1282180': 0.45,
'1282116': 0.40,
'1282112': 0.35,
'1116370': 0.30,
'1116368': 0.25,
'1282155': 0.20,
'1282114': 0.15,
},
'A.31': {
'439044': 1.00,
'692232': 0.50,
},
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
def test_best_score_with_unjudged_documents(self):
parsed_run = {
'A.78': {
'493782': 1.00,
'493764': 0.95,
'2008712': 0.90,
'1282166': 0.85,
'2008631': 0.80,
'2008628': 0.75,
'2008609': 0.70,
'1116378': 0.65,
'2008650': 0.60,
'2008616': 0.55,
'2008449': 0.50,
'1282180': 0.45,
'1282116': 0.40,
'1282112': 0.35,
'1116370': 0.30,
'1116368': 0.25,
'1282155': 0.20,
'unjudged_1': 0.15,
'1282114': 0.10,
'unjudged_2': 0.05,
},
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
def test_best_score_with_unjudged_topics_and_documents(self):
parsed_run = {
'A.78': {
'493782': 1.00,
'493764': 0.95,
'2008712': 0.90,
'1282166': 0.85,
'2008631': 0.80,
'2008628': 0.75,
'2008609': 0.70,
'1116378': 0.65,
'2008650': 0.60,
'2008616': 0.55,
'2008449': 0.50,
'1282180': 0.45,
'1282116': 0.40,
'1282112': 0.35,
'1116370': 0.30,
'1116368': 0.25,
'1282155': 0.20,
'unjudged_1': 0.15,
'1282114': 0.10,
'unjudged_2': 0.05,
},
'A.31': {
'439044': 1.00,
'692232': 0.50,
},
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
expected_ndcg_score = 1.0
self.assertEqual(expected_ndcg_score, ndcg_score)
def test_worst_score(self):
parsed_run = {
'A.78': {
'493782': 0.15,
'493764': 0.20,
'2008712': 0.25,
'1282166': 0.30,
'2008631': 0.35,
'2008628': 0.40,
'2008609': 0.45,
'1116378': 0.50,
'2008650': 0.55,
'2008616': 0.60,
'2008449': 0.65,
'1282180': 0.70,
'1282116': 0.75,
'1282112': 0.80,
'1116370': 0.85,
'1116368': 0.90,
'1282155': 0.95,
'1282114': 1.00,
}
}
ndcg_score = ndcg(parsed_run, 'task1', 'test')
dcg_score = 0.0
for i in range(1, 3):
dcg_score += 0.0 / log2(i + 1)
for i in range(3, 11):
dcg_score += 1.0 / log2(i + 1)
for i in range(11, 15):
dcg_score += 2.0 / log2(i + 1)