Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Michal Štefánik
ARQMath-eval
Commits
dd334afc
Commit
dd334afc
authored
Apr 15, 2020
by
Vít Novotný
Browse files
Add unit tests
parent
7239538f
Pipeline
#58284
canceled with stage
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
MANIFEST.in
View file @
dd334afc
scripts/NTCIR11_Math-qrels-train.dat
scripts/NTCIR11_Math-qrels-test.dat
scripts/NTCIR12_Math-qrels_agg-train.dat
scripts/NTCIR12_Math-qrels_agg-test.dat
scripts/NTCIR12_MathWikiFrm-qrels_agg-train.dat
scripts/NTCIR12_MathWikiFrm-qrels_agg-test.dat
scripts/qrel.V1.0-train.tsv
scripts/qrel.V1.0-test.tsv
scripts/votes-qrels-train.V1.0.tsv
scripts/votes-qrels-train-train.V1.0.tsv
scripts/votes-qrels-train-validation.V1.0.tsv
scripts/votes-qrels-test.V1.0.tsv
include
scripts/NTCIR11_Math-qrels-train.dat
include
scripts/NTCIR11_Math-qrels-test.dat
include
scripts/NTCIR12_Math-qrels_agg-train.dat
include
scripts/NTCIR12_Math-qrels_agg-test.dat
include
scripts/NTCIR12_MathWikiFrm-qrels_agg-train.dat
include
scripts/NTCIR12_MathWikiFrm-qrels_agg-test.dat
include
scripts/qrel.V1.0-train.tsv
include
scripts/qrel.V1.0-test.tsv
include
scripts/votes-qrels-train.V1.0.tsv
include
scripts/votes-qrels-train-train.V1.0.tsv
include
scripts/votes-qrels-train-validation.V1.0.tsv
include
scripts/votes-qrels-test.V1.0.tsv
scripts/.common.py.swp
deleted
100644 → 0
View file @
7239538f
File deleted
scripts/__pycache__/__init__.cpython-35.pyc
deleted
100644 → 0
View file @
7239538f
File deleted
scripts/__pycache__/common.cpython-35.pyc
deleted
100644 → 0
View file @
7239538f
File deleted
scripts/__pycache__/configuration.cpython-35.pyc
deleted
100644 → 0
View file @
7239538f
File deleted
scripts/__pycache__/evaluate.cpython-35.pyc
deleted
100644 → 0
View file @
7239538f
File deleted
test/__init__.py
0 → 100644
View file @
dd334afc
test/test_get_judged_documents.py
0 → 100644
View file @
dd334afc
import
unittest
from
arqmath_eval
import
get_judged_documents
class
TestGetJudgedDocuments
(
unittest
.
TestCase
):
def
test_all_subsets_and_all_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
)
expected_documents
=
{
'48162'
,
'48164'
,
'48165'
,
'48167'
,
'48172'
,
'48181'
,
'48184'
,
'48202'
,
'48219'
,
'48235'
,
'48241'
,
'48260'
,
'53779'
,
'53781'
,
'53784'
,
'53790'
,
'69435'
,
'70739'
,
'70741'
,
'98328'
,
'168286'
,
'168290'
,
'168305'
,
'168323'
,
'263828'
,
'264299'
,
'264315'
,
'264329'
,
'319916'
,
'319917'
,
'319919'
,
'319938'
,
'319993'
,
'439027'
,
'439044'
,
'439055'
,
'439132'
,
'472635'
,
'493764'
,
'493782'
,
'496898'
,
'496909'
,
'574514'
,
'616315'
,
'616321'
,
'616373'
,
'616514'
,
'672516'
,
'692232'
,
'743738'
,
'860842'
,
'876137'
,
'876221'
,
'897705'
,
'982759'
,
'1018719'
,
'1116368'
,
'1116370'
,
'1116378'
,
'1282112'
,
'1282114'
,
'1282116'
,
'1282155'
,
'1282166'
,
'1282180'
,
'1489896'
,
'1596444'
,
'1609339'
,
'1623400'
,
'1639289'
,
'2008449'
,
'2008609'
,
'2008616'
,
'2008628'
,
'2008631'
,
'2008650'
,
'2008712'
,
'2170920'
,
'2227543'
,
'2362771'
,
'2602592'
,
'2780928'
,
'2968174'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
def
test_selected_subsets_all_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
,
'train'
)
expected_documents
=
{
'48162'
,
'48164'
,
'48165'
,
'48167'
,
'48172'
,
'48181'
,
'48184'
,
'48202'
,
'48219'
,
'48235'
,
'48241'
,
'48260'
,
'53779'
,
'53781'
,
'53784'
,
'53790'
,
'69435'
,
'70739'
,
'70741'
,
'98328'
,
'168286'
,
'168290'
,
'168305'
,
'168323'
,
'263828'
,
'264299'
,
'264315'
,
'264329'
,
'319916'
,
'319917'
,
'319919'
,
'319938'
,
'319993'
,
'439027'
,
'439044'
,
'439055'
,
'439132'
,
'472635'
,
'496898'
,
'496909'
,
'574514'
,
'616315'
,
'616321'
,
'616373'
,
'616514'
,
'672516'
,
'692232'
,
'743738'
,
'860842'
,
'876137'
,
'876221'
,
'897705'
,
'982759'
,
'1018719'
,
'1116368'
,
'1282180'
,
'1489896'
,
'1596444'
,
'1609339'
,
'1623400'
,
'1639289'
,
'2170920'
,
'2227543'
,
'2362771'
,
'2602592'
,
'2780928'
,
'2968174'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
)
expected_documents
=
{
'493764'
,
'493782'
,
'1116368'
,
'1116370'
,
'1116378'
,
'1282112'
,
'1282114'
,
'1282116'
,
'1282155'
,
'1282166'
,
'1282180'
,
'2008449'
,
'2008609'
,
'2008616'
,
'2008628'
,
'2008631'
,
'2008650'
,
'2008712'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
def
test_all_subsets_selected_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
,
topic
=
'A.31'
)
expected_documents
=
{
'48162'
,
'48164'
,
'48165'
,
'48167'
,
'48172'
,
'48181'
,
'48184'
,
'48202'
,
'48219'
,
'48235'
,
'48241'
,
'48260'
,
'53779'
,
'53781'
,
'53784'
,
'53790'
,
'69435'
,
'70739'
,
'70741'
,
'98328'
,
'168286'
,
'168290'
,
'168305'
,
'168323'
,
'264329'
,
'439044'
,
'472635'
,
'574514'
,
'616315'
,
'616321'
,
'616373'
,
'616514'
,
'672516'
,
'692232'
,
'743738'
,
'860842'
,
'897705'
,
'982759'
,
'1018719'
,
'1116368'
,
'1282180'
,
'1596444'
,
'1609339'
,
'1623400'
,
'1639289'
,
'2170920'
,
'2227543'
,
'2362771'
,
'2602592'
,
'2968174'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
topic
=
'A.101'
)
expected_documents
=
{
'263828'
,
'264299'
,
'264315'
,
'264329'
,
'319916'
,
'319917'
,
'319919'
,
'319938'
,
'319993'
,
'439027'
,
'439044'
,
'439055'
,
'439132'
,
'496898'
,
'496909'
,
'876137'
,
'876221'
,
'1489896'
,
'2780928'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
topic
=
'A.78'
)
expected_documents
=
{
'493764'
,
'493782'
,
'1116368'
,
'1116370'
,
'1116378'
,
'1282112'
,
'1282114'
,
'1282116'
,
'1282155'
,
'1282166'
,
'1282180'
,
'2008449'
,
'2008609'
,
'2008616'
,
'2008628'
,
'2008631'
,
'2008650'
,
'2008712'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
def
test_selected_subsets_selected_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
,
'train'
,
'A.31'
)
expected_documents
=
{
'48162'
,
'48164'
,
'48165'
,
'48167'
,
'48172'
,
'48181'
,
'48184'
,
'48202'
,
'48219'
,
'48235'
,
'48241'
,
'48260'
,
'53779'
,
'53781'
,
'53784'
,
'53790'
,
'69435'
,
'70739'
,
'70741'
,
'98328'
,
'168286'
,
'168290'
,
'168305'
,
'168323'
,
'264329'
,
'439044'
,
'472635'
,
'574514'
,
'616315'
,
'616321'
,
'616373'
,
'616514'
,
'672516'
,
'692232'
,
'743738'
,
'860842'
,
'897705'
,
'982759'
,
'1018719'
,
'1116368'
,
'1282180'
,
'1596444'
,
'1609339'
,
'1623400'
,
'1639289'
,
'2170920'
,
'2227543'
,
'2362771'
,
'2602592'
,
'2968174'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
,
'A.31'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'train'
,
'A.101'
)
expected_documents
=
{
'263828'
,
'264299'
,
'264315'
,
'264329'
,
'319916'
,
'319917'
,
'319919'
,
'319938'
,
'319993'
,
'439027'
,
'439044'
,
'439055'
,
'439132'
,
'496898'
,
'496909'
,
'876137'
,
'876221'
,
'1489896'
,
'2780928'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
,
'A.101'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'train'
,
'A.78'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
,
'A.78'
)
expected_documents
=
{
'493764'
,
'493782'
,
'1116368'
,
'1116370'
,
'1116378'
,
'1282112'
,
'1282114'
,
'1282116'
,
'1282155'
,
'1282166'
,
'1282180'
,
'2008449'
,
'2008609'
,
'2008616'
,
'2008628'
,
'2008631'
,
'2008650'
,
'2008712'
,
}
self
.
assertEqual
(
expected_documents
,
documents
)
test/test_get_topics.py
0 → 100644
View file @
dd334afc
import
unittest
from
arqmath_eval
import
get_topics
class
TestGetTopics
(
unittest
.
TestCase
):
def
test_all_subsets
(
self
):
topics
=
get_topics
(
'task1'
)
expected_topics
=
{
'A.31'
,
'A.101'
,
'A.78'
}
self
.
assertEqual
(
expected_topics
,
topics
)
def
test_selected_subsets
(
self
):
topics
=
get_topics
(
'task1'
,
'train'
)
expected_topics
=
{
'A.31'
,
'A.101'
}
self
.
assertEqual
(
expected_topics
,
topics
)
topics
=
get_topics
(
'task1'
,
'test'
)
expected_topics
=
{
'A.78'
}
self
.
assertEqual
(
expected_topics
,
topics
)
test/test_ndcg.py
0 → 100644
View file @
dd334afc
from
math
import
log2
import
unittest
from
arqmath_eval
import
ndcg
class
TestNDCG
(
unittest
.
TestCase
):
def
test_best_score
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
'493764'
:
0.95
,
'2008712'
:
0.90
,
'1282166'
:
0.85
,
'2008631'
:
0.80
,
'2008628'
:
0.75
,
'2008609'
:
0.70
,
'1116378'
:
0.65
,
'2008650'
:
0.60
,
'2008616'
:
0.55
,
'2008449'
:
0.50
,
'1282180'
:
0.45
,
'1282116'
:
0.40
,
'1282112'
:
0.35
,
'1116370'
:
0.30
,
'1116368'
:
0.25
,
'1282155'
:
0.20
,
'1282114'
:
0.15
,
}
}
ndcg_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg_score
=
1.0
self
.
assertEqual
(
expected_ndcg_score
,
ndcg_score
)
def
test_best_score_with_unjudged_topics
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
'493764'
:
0.95
,
'2008712'
:
0.90
,
'1282166'
:
0.85
,
'2008631'
:
0.80
,
'2008628'
:
0.75
,
'2008609'
:
0.70
,
'1116378'
:
0.65
,
'2008650'
:
0.60
,
'2008616'
:
0.55
,
'2008449'
:
0.50
,
'1282180'
:
0.45
,
'1282116'
:
0.40
,
'1282112'
:
0.35
,
'1116370'
:
0.30
,
'1116368'
:
0.25
,
'1282155'
:
0.20
,
'1282114'
:
0.15
,
},
'A.31'
:
{
'439044'
:
1.00
,
'692232'
:
0.50
,
},
}
ndcg_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg_score
=
1.0
self
.
assertEqual
(
expected_ndcg_score
,
ndcg_score
)
def
test_best_score_with_unjudged_documents
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
'493764'
:
0.95
,
'2008712'
:
0.90
,
'1282166'
:
0.85
,
'2008631'
:
0.80
,
'2008628'
:
0.75
,
'2008609'
:
0.70
,
'1116378'
:
0.65
,
'2008650'
:
0.60
,
'2008616'
:
0.55
,
'2008449'
:
0.50
,
'1282180'
:
0.45
,
'1282116'
:
0.40
,
'1282112'
:
0.35
,
'1116370'
:
0.30
,
'1116368'
:
0.25
,
'1282155'
:
0.20
,
'unjudged_1'
:
0.15
,
'1282114'
:
0.10
,
'unjudged_2'
:
0.05
,
},
}
ndcg_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg_score
=
1.0
self
.
assertEqual
(
expected_ndcg_score
,
ndcg_score
)
def
test_best_score_with_unjudged_topics_and_documents
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
'493764'
:
0.95
,
'2008712'
:
0.90
,
'1282166'
:
0.85
,
'2008631'
:
0.80
,
'2008628'
:
0.75
,
'2008609'
:
0.70
,
'1116378'
:
0.65
,
'2008650'
:
0.60
,
'2008616'
:
0.55
,
'2008449'
:
0.50
,
'1282180'
:
0.45
,
'1282116'
:
0.40
,
'1282112'
:
0.35
,
'1116370'
:
0.30
,
'1116368'
:
0.25
,
'1282155'
:
0.20
,
'unjudged_1'
:
0.15
,
'1282114'
:
0.10
,
'unjudged_2'
:
0.05
,
},
'A.31'
:
{
'439044'
:
1.00
,
'692232'
:
0.50
,
},
}
ndcg_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg_score
=
1.0
self
.
assertEqual
(
expected_ndcg_score
,
ndcg_score
)
def
test_worst_score
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
0.15
,
'493764'
:
0.20
,
'2008712'
:
0.25
,
'1282166'
:
0.30
,
'2008631'
:
0.35
,
'2008628'
:
0.40
,
'2008609'
:
0.45
,
'1116378'
:
0.50
,
'2008650'
:
0.55
,
'2008616'
:
0.60
,
'2008449'
:
0.65
,
'1282180'
:
0.70
,
'1282116'
:
0.75
,
'1282112'
:
0.80
,
'1116370'
:
0.85
,
'1116368'
:
0.90
,
'1282155'
:
0.95
,
'1282114'
:
1.00
,
}
}
ndcg_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
dcg_score
=
0.0
for
i
in
range
(
1
,
3
):
dcg_score
+=
0.0
/
log2
(
i
+
1
)
for
i
in
range
(
3
,
11
):
dcg_score
+=
1.0
/
log2
(
i
+
1
)
for
i
in
range
(
11
,
15
):
dcg_score
+=
2.0
/
log2
(
i
+
1
)