Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Michal Štefánik
ARQMath-eval
Commits
9273b3ea
Commit
9273b3ea
authored
Jul 11, 2020
by
Vít Novotný
Browse files
Add ARQMath task 1 and 2 relevance judgements
parent
dcc5ddb0
Changes
27
Show whitespace changes
Inline
Side-by-side
task1/xstefan3/README.md
→
task1
-example
/xstefan3/README.md
View file @
9273b3ea
File moved
task1/xstefan3/example_key1=value1_key2=value2_etc.tsv
→
task1
-example
/xstefan3/example_key1=value1_key2=value2_etc.tsv
View file @
9273b3ea
File moved
test/test_get_judged_documents.py
View file @
9273b3ea
...
...
@@ -5,7 +5,7 @@ from arqmath_eval import get_judged_documents
class
TestGetJudgedDocuments
(
unittest
.
TestCase
):
def
test_all_subsets_all_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
)
documents
=
get_judged_documents
(
'task1
-example
'
)
expected_documents
=
{
'48162'
,
'48164'
,
...
...
@@ -94,7 +94,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
self
.
assertEqual
(
expected_documents
,
documents
)
def
test_selected_subsets_all_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
,
'train'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'train'
)
expected_documents
=
{
'70741'
,
'70739'
,
...
...
@@ -149,7 +149,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'validation'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'validation'
)
expected_documents
=
{
'263828'
,
'264299'
,
...
...
@@ -173,7 +173,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'test'
)
expected_documents
=
{
'493764'
,
'493782'
,
...
...
@@ -197,7 +197,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
self
.
assertEqual
(
expected_documents
,
documents
)
def
test_all_subsets_selected_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
,
topic
=
'A.31'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
topic
=
'A.31'
)
expected_documents
=
{
'48162'
,
'48164'
,
...
...
@@ -252,7 +252,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
topic
=
'A.101'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
topic
=
'A.101'
)
expected_documents
=
{
'263828'
,
'264299'
,
...
...
@@ -276,7 +276,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
topic
=
'A.78'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
topic
=
'A.78'
)
expected_documents
=
{
'493764'
,
'493782'
,
...
...
@@ -300,7 +300,7 @@ class TestGetJudgedDocuments(unittest.TestCase):
self
.
assertEqual
(
expected_documents
,
documents
)
def
test_selected_subsets_selected_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
,
'train'
,
'A.31'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'train'
,
'A.31'
)
expected_documents
=
{
'48162'
,
'48164'
,
...
...
@@ -355,19 +355,19 @@ class TestGetJudgedDocuments(unittest.TestCase):
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'validation'
,
'A.31'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'validation'
,
'A.31'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
,
'A.31'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'test'
,
'A.31'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'train'
,
'A.101'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'train'
,
'A.101'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'validation'
,
'A.101'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'validation'
,
'A.101'
)
expected_documents
=
{
'263828'
,
'264299'
,
...
...
@@ -391,19 +391,19 @@ class TestGetJudgedDocuments(unittest.TestCase):
}
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
,
'A.101'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'test'
,
'A.101'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'train'
,
'A.78'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'train'
,
'A.78'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'validation'
,
'A.78'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'validation'
,
'A.78'
)
expected_documents
=
set
()
self
.
assertEqual
(
expected_documents
,
documents
)
documents
=
get_judged_documents
(
'task1'
,
'test'
,
'A.78'
)
documents
=
get_judged_documents
(
'task1
-example
'
,
'test'
,
'A.78'
)
expected_documents
=
{
'493764'
,
'493782'
,
...
...
test/test_get_ndcg.py
View file @
9273b3ea
...
...
@@ -28,7 +28,7 @@ class TestGetNDCG(unittest.TestCase):
'1282114'
:
0.15
,
}
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
...
...
@@ -59,7 +59,7 @@ class TestGetNDCG(unittest.TestCase):
'692232'
:
0.50
,
},
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
...
...
@@ -88,7 +88,7 @@ class TestGetNDCG(unittest.TestCase):
'unjudged_2'
:
0.05
,
},
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
...
...
@@ -121,7 +121,7 @@ class TestGetNDCG(unittest.TestCase):
'692232'
:
0.50
,
},
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
...
...
@@ -148,7 +148,7 @@ class TestGetNDCG(unittest.TestCase):
'1282114'
:
1.00
,
}
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
3
):
...
...
@@ -196,7 +196,7 @@ class TestGetNDCG(unittest.TestCase):
'1282114'
:
0.15
,
}
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
,
4
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
,
4
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
5
):
...
...
@@ -238,7 +238,7 @@ class TestGetNDCG(unittest.TestCase):
'1282114'
:
1.00
,
}
}
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
,
4
)
ndcg
=
get_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
,
4
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
3
):
...
...
test/test_get_random_ndcg.py
View file @
9273b3ea
...
...
@@ -7,7 +7,7 @@ from arqmath_eval import get_random_ndcg, get_ndcg, get_topics, get_judged_docum
class
TestGetRandomNDCG
(
unittest
.
TestCase
):
def
test_using_equation
(
self
):
ndcg
=
get_random_ndcg
(
'task1'
,
'test'
)
ndcg
=
get_random_ndcg
(
'task1
-example
'
,
'test'
)
expected_judgement
=
(
4
*
3.0
+
...
...
@@ -48,7 +48,7 @@ class TestGetRandomNDCG(unittest.TestCase):
self
.
assertAlmostEqual
(
expected_ndcg
,
ndcg
,
places
=
2
)
def
test_with_topn
(
self
):
ndcg
=
get_random_ndcg
(
'task1'
,
'test'
,
4
)
ndcg
=
get_random_ndcg
(
'task1
-example
'
,
'test'
,
4
)
expected_judgement
=
(
4
*
3.0
+
...
...
test/test_get_random_normalized_ndcg.py
View file @
9273b3ea
...
...
@@ -7,13 +7,13 @@ from arqmath_eval import get_random_ndcg, get_random_normalized_ndcg
class
TestGetRandomNormalizedNDCG
(
unittest
.
TestCase
):
def
test_hand_picked
(
self
):
parsed_run
=
{}
random_ndcg
=
get_random_ndcg
(
'task1'
,
'test'
)
random_ndcg
=
get_random_ndcg
(
'task1
-example
'
,
'test'
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1'
,
'test'
,
ndcg
=
1.0
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
,
ndcg
=
1.0
)
expected_random_normalized_ndcg
=
1.0
self
.
assertEqual
(
expected_random_normalized_ndcg
,
random_normalized_ndcg
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1'
,
'test'
,
ndcg
=
random_ndcg
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
,
ndcg
=
random_ndcg
)
expected_random_normalized_ndcg
=
0.0
self
.
assertEqual
(
expected_random_normalized_ndcg
,
random_normalized_ndcg
)
...
...
@@ -40,7 +40,7 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
'1282114'
:
0.15
,
}
}
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1'
,
'test'
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_random_normalized_ndcg
=
1.0
...
...
@@ -69,7 +69,7 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
'1282114'
:
1.00
,
}
}
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1'
,
'test'
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
3
):
...
...
@@ -92,7 +92,7 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
expected_idcg
+=
0.0
/
log2
(
i
+
1
)
expected_ndcg
=
expected_dcg
/
expected_idcg
random_ndcg
=
get_random_ndcg
(
'task1'
,
'test'
)
random_ndcg
=
get_random_ndcg
(
'task1
-example
'
,
'test'
)
expected_random_normalized_ndcg
=
(
expected_ndcg
-
random_ndcg
)
/
(
1.0
-
random_ndcg
)
self
.
assertEqual
(
expected_random_normalized_ndcg
,
random_normalized_ndcg
)
...
...
@@ -119,7 +119,7 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
'1282114'
:
0.15
,
}
}
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1'
,
'test'
,
4
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
,
4
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
5
):
...
...
@@ -136,7 +136,7 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
expected_idcg
+=
0.0
/
log2
(
i
+
1
)
expected_ndcg
=
expected_dcg
/
expected_idcg
random_ndcg
=
get_random_ndcg
(
'task1'
,
'test'
,
4
)
random_ndcg
=
get_random_ndcg
(
'task1
-example
'
,
'test'
,
4
)
expected_random_normalized_ndcg
=
(
expected_ndcg
-
random_ndcg
)
/
(
1.0
-
random_ndcg
)
self
.
assertEqual
(
expected_random_normalized_ndcg
,
random_normalized_ndcg
)
...
...
@@ -163,7 +163,7 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
'1282114'
:
1.00
,
}
}
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1'
,
'test'
,
4
)
random_normalized_ndcg
=
get_random_normalized_ndcg
(
parsed_run
,
'task1
-example
'
,
'test'
,
4
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
3
):
...
...
@@ -182,6 +182,6 @@ class TestGetRandomNormalizedNDCG(unittest.TestCase):
expected_idcg
+=
0.0
/
log2
(
i
+
1
)
expected_ndcg
=
expected_dcg
/
expected_idcg
random_ndcg
=
get_random_ndcg
(
'task1'
,
'test'
,
4
)
random_ndcg
=
get_random_ndcg
(
'task1
-example
'
,
'test'
,
4
)
expected_random_normalized_ndcg
=
(
expected_ndcg
-
random_ndcg
)
/
(
1.0
-
random_ndcg
)
self
.
assertEqual
(
expected_random_normalized_ndcg
,
random_normalized_ndcg
)
test/test_get_topics.py
View file @
9273b3ea
...
...
@@ -6,29 +6,29 @@ from arqmath_eval.configuration import TASKS
class
TestGetTopics
(
unittest
.
TestCase
):
def
test_all_subsets
(
self
):
topics
=
get_topics
(
'task1'
)
topics
=
get_topics
(
'task1
-example
'
)
expected_topics
=
{
'A.31'
,
'A.101'
,
'A.78'
}
self
.
assertEqual
(
expected_topics
,
topics
)
def
test_selected_subsets
(
self
):
topics
=
get_topics
(
'task1'
,
'train'
)
topics
=
get_topics
(
'task1
-example
'
,
'train'
)
expected_topics
=
{
'A.31'
}
self
.
assertEqual
(
expected_topics
,
topics
)
topics
=
get_topics
(
'task1'
,
'validation'
)
topics
=
get_topics
(
'task1
-example
'
,
'validation'
)
expected_topics
=
{
'A.101'
}
self
.
assertEqual
(
expected_topics
,
topics
)
topics
=
get_topics
(
'task1'
,
'test'
)
topics
=
get_topics
(
'task1
-example
'
,
'test'
)
expected_topics
=
{
'A.78'
}
self
.
assertEqual
(
expected_topics
,
topics
)
def
test_train_validation_test_split
(
self
):
for
task
in
TASKS
:
train_topics
=
get_topics
(
'task1'
,
'train'
)
validation_topics
=
get_topics
(
'task1'
,
'validation'
)
test_topics
=
get_topics
(
'task1'
,
'test'
)
all_topics
=
get_topics
(
'task1'
)
train_topics
=
get_topics
(
'task1
-example
'
,
'train'
)
validation_topics
=
get_topics
(
'task1
-example
'
,
'validation'
)
test_topics
=
get_topics
(
'task1
-example
'
,
'test'
)
all_topics
=
get_topics
(
'task1
-example
'
)
self
.
assertEqual
(
len
(
validation_topics
),
len
(
test_topics
))
train_ratio
=
len
(
train_topics
)
/
len
(
all_topics
)
...
...
Prev
1
2
Next
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment