Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Michal Štefánik
ARQMath-eval
Commits
b5fae0ab
Commit
b5fae0ab
authored
Apr 15, 2020
by
Vít Novotný
Browse files
Rename common.ndcg to common.get_ndcg
parent
dd334afc
Changes
4
Hide whitespace changes
Inline
Side-by-side
scripts/__init__.py
View file @
b5fae0ab
from
.common
import
get_topics
,
get_judged_documents
,
ndcg
from
.common
import
get_topics
,
get_judged_documents
,
get_ndcg
as
ndcg
,
get_
ndcg
scripts/common.py
View file @
b5fae0ab
...
...
@@ -46,7 +46,7 @@ def get_judged_documents(task='task1-votes', subset=None, topic=None):
return
judged_documents
def
ndcg
(
parsed_run
,
task
=
'task1-votes'
,
subset
=
'train-validation'
):
def
get_
ndcg
(
parsed_run
,
task
=
'task1-votes'
,
subset
=
'train-validation'
):
evaluator
=
EVALUATORS
[
subset
][
task
]
only_judged_parsed_run
=
remove_nonjudged_topics_and_documents
(
parsed_run
,
task
,
subset
)
evaluation
=
evaluator
.
evaluate
(
only_judged_parsed_run
)
...
...
scripts/evaluate.py
View file @
b5fae0ab
...
...
@@ -6,7 +6,7 @@ import re
from
pytrec_eval
import
parse_run
from
.common
import
ndcg
from
.common
import
get_
ndcg
from
.configuration
import
TASKS
,
TASK_README_HEAD
,
USER_README_HEAD
...
...
@@ -21,16 +21,16 @@ if __name__ == '__main__':
result_name
=
re
.
sub
(
'_'
,
', '
,
os
.
path
.
basename
(
result
)[:
-
4
])
with
open
(
result
,
'rt'
)
as
f
:
parsed_result
=
parse_run
(
f
)
user_results
.
append
((
ndcg
(
parsed_result
,
task
,
'test'
),
result_name
))
user_results
.
append
((
get_
ndcg
(
parsed_result
,
task
,
'test'
),
result_name
))
best_ndcg
,
best_result_name
=
max
(
user_results
)
task_results
.
append
((
best_ndcg
,
user_name
,
best_result_name
))
with
open
(
os
.
path
.
join
(
user
,
'README.md'
),
'wt'
)
as
f
:
f
.
write
(
USER_README_HEAD
%
user_name
)
f
.
write
(
'
\n
'
)
for
ndcg
_score
,
result_name
in
sorted
(
user_results
,
reverse
=
True
):
f
.
write
(
'| %.4f | %s |
\n
'
%
(
ndcg
_score
,
result_name
))
for
ndcg
,
result_name
in
sorted
(
user_results
,
reverse
=
True
):
f
.
write
(
'| %.4f | %s |
\n
'
%
(
ndcg
,
result_name
))
with
open
(
os
.
path
.
join
(
task
,
'README.md'
),
'wt'
)
as
f
:
f
.
write
(
TASK_README_HEAD
)
f
.
write
(
'
\n
'
)
for
ndcg
_score
,
user_name
,
result_name
in
sorted
(
task_results
,
reverse
=
True
):
f
.
write
(
'| %.4f | %s | %s |
\n
'
%
(
ndcg
_score
,
user_name
,
result_name
))
for
ndcg
,
user_name
,
result_name
in
sorted
(
task_results
,
reverse
=
True
):
f
.
write
(
'| %.4f | %s | %s |
\n
'
%
(
ndcg
,
user_name
,
result_name
))
test/test_ndcg.py
View file @
b5fae0ab
from
math
import
log2
import
unittest
from
arqmath_eval
import
ndcg
from
arqmath_eval
import
get_
ndcg
class
TestNDCG
(
unittest
.
TestCase
):
def
test_best
_score
(
self
):
def
test_best
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
...
...
@@ -28,11 +28,11 @@ class TestNDCG(unittest.TestCase):
'1282114'
:
0.15
,
}
}
ndcg
_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
_score
=
1.0
self
.
assertEqual
(
expected_ndcg
_score
,
ndcg_score
)
ndcg
=
get_
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
def
test_best_
score_
with_unjudged_topics
(
self
):
def
test_best_with_unjudged_topics
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
...
...
@@ -59,11 +59,11 @@ class TestNDCG(unittest.TestCase):
'692232'
:
0.50
,
},
}
ndcg
_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
_score
=
1.0
self
.
assertEqual
(
expected_ndcg
_score
,
ndcg_score
)
ndcg
=
get_
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
def
test_best_
score_
with_unjudged_documents
(
self
):
def
test_best_with_unjudged_documents
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
...
...
@@ -88,11 +88,11 @@ class TestNDCG(unittest.TestCase):
'unjudged_2'
:
0.05
,
},
}
ndcg
_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
_score
=
1.0
self
.
assertEqual
(
expected_ndcg
_score
,
ndcg_score
)
ndcg
=
get_
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
def
test_best_
score_
with_unjudged_topics_and_documents
(
self
):
def
test_best_with_unjudged_topics_and_documents
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
1.00
,
...
...
@@ -121,11 +121,11 @@ class TestNDCG(unittest.TestCase):
'692232'
:
0.50
,
},
}
ndcg
_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
_score
=
1.0
self
.
assertEqual
(
expected_ndcg
_score
,
ndcg_score
)
ndcg
=
get_
ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_ndcg
=
1.0
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
def
test_worst
_score
(
self
):
def
test_worst
(
self
):
parsed_run
=
{
'A.78'
:
{
'493782'
:
0.15
,
...
...
@@ -148,26 +148,27 @@ class TestNDCG(unittest.TestCase):
'1282114'
:
1.00
,
}
}
ndcg_score
=
ndcg
(
parsed_run
,
'task1'
,
'test'
)
dcg_score
=
0.0
ndcg
=
get_ndcg
(
parsed_run
,
'task1'
,
'test'
)
expected_dcg
=
0.0
for
i
in
range
(
1
,
3
):
dcg_score
+=
0.0
/
log2
(
i
+
1
)
expected_dcg
+=
0.0
/
log2
(
i
+
1
)
for
i
in
range
(
3
,
11
):
dcg_score
+=
1.0
/
log2
(
i
+
1
)
expected_dcg
+=
1.0
/
log2
(
i
+
1
)
for
i
in
range
(
11
,
15
):
dcg_score
+=
2.0
/
log2
(
i
+
1
)
expected_dcg
+=
2.0
/
log2
(
i
+
1
)
for
i
in
range
(
15
,
19
):
dcg_score
+=
3.0
/
log2
(
i
+
1
)
expected_dcg
+=
3.0
/
log2
(
i
+
1
)
idcg_score
=
0.0
expected_idcg
=
0.0
for
i
in
range
(
1
,
5
):
idcg_score
+=
3.0
/
log2
(
i
+
1
)
expected_idcg
+=
3.0
/
log2
(
i
+
1
)
for
i
in
range
(
5
,
9
):
idcg_score
+=
2.0
/
log2
(
i
+
1
)
expected_idcg
+=
2.0
/
log2
(
i
+
1
)
for
i
in
range
(
9
,
17
):
idcg_score
+=
1.0
/
log2
(
i
+
1
)
expected_idcg
+=
1.0
/
log2
(
i
+
1
)
for
i
in
range
(
17
,
19
):
idcg_score
+=
0.0
/
log2
(
i
+
1
)
expected_idcg
+=
0.0
/
log2
(
i
+
1
)
expected_ndcg
_score
=
dcg_score
/
idcg_score
self
.
assertEqual
(
expected_ndcg
_score
,
ndcg_score
)
expected_ndcg
=
expected_dcg
/
expected_idcg
self
.
assertEqual
(
expected_ndcg
,
ndcg
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment