Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Michal Štefánik
ARQMath-eval
Commits
d9b73cb8
Commit
d9b73cb8
authored
Apr 15, 2020
by
Vít Novotný
Browse files
Add common.get_random_ndcg
parent
4157e76c
Pipeline
#58287
canceled with stage
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
scripts/__init__.py
View file @
d9b73cb8
from
.common
import
get_topics
,
get_judged_documents
,
get_ndcg
as
ndcg
,
get_ndcg
from
.common
import
get_topics
,
get_judged_documents
,
get_ndcg
as
ndcg
,
get_ndcg
,
get_random_ndcg
scripts/common.py
View file @
d9b73cb8
# -*- coding:utf-8 -*-
from
copy
import
deepcopy
from
itertools
import
chain
from
math
import
log2
import
numpy
as
np
...
...
@@ -46,9 +48,29 @@ def get_judged_documents(task, subset=None, topic=None):
return
judged_documents
def
get_ndcg
(
parsed_run
,
task
=
'task1-votes'
,
subset
=
'train-validation'
):
def
get_ndcg
(
parsed_run
,
task
,
subset
):
evaluator
=
EVALUATORS
[
subset
][
task
]
only_judged_parsed_run
=
remove_nonjudged_topics_and_documents
(
parsed_run
,
task
,
subset
)
evaluation
=
evaluator
.
evaluate
(
only_judged_parsed_run
)
ndcg
=
np
.
mean
([
measures
[
'ndcg'
]
for
topic
,
measures
in
evaluation
.
items
()])
return
ndcg
def
get_random_ndcg
(
task
,
subset
):
judgements
=
sorted
([
judgement
for
subset
in
PARSED_RELEVANCE_JUDGEMENTS
[
subset
][
task
].
values
()
for
judgement
in
subset
.
values
()
],
reverse
=
True
)
expected_judgement
=
np
.
mean
(
judgements
)
random_dcg
=
0.0
for
i
in
range
(
len
(
judgements
)):
random_dcg
+=
expected_judgement
/
log2
(
i
+
2
)
ideal_dcg
=
0.0
for
i
,
judgement
in
enumerate
(
judgements
):
ideal_dcg
+=
judgement
/
log2
(
i
+
2
)
random_ndcg
=
random_dcg
/
ideal_dcg
return
random_ndcg
test/test_get_judged_documents.py
View file @
d9b73cb8
...
...
@@ -4,7 +4,7 @@ from arqmath_eval import get_judged_documents
class
TestGetJudgedDocuments
(
unittest
.
TestCase
):
def
test_all_subsets_
and_
all_topics
(
self
):
def
test_all_subsets_all_topics
(
self
):
documents
=
get_judged_documents
(
'task1'
)
expected_documents
=
{
'48162'
,
...
...
test/test_get_ndcg.py
View file @
d9b73cb8
...
...
@@ -4,7 +4,7 @@ import unittest
from
arqmath_eval
import
get_ndcg
class
TestNDCG
(
unittest
.
TestCase
):
class
Test
Get
NDCG
(
unittest
.
TestCase
):
def
test_best
(
self
):
parsed_run
=
{
'A.78'
:
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment