Unverified Commit 606c2347 authored by Vít Novotný's avatar Vít Novotný
Browse files

Add type hints

parent 5c52474f
......@@ -17,6 +17,20 @@ jobs:
pip install flake8
- run: |
flake8
flake8:
name: Check Typing
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
with:
python-version: 3.7
- run: |
python -m pip install --upgrade pip
pip install pytype
pip install .
- run: |
pytype .
unit_tests:
name: Run Unit Tests
runs-on: ubuntu-latest
......
......@@ -74,15 +74,6 @@ $ python
0.5876
```
Here is the documentation of the available evaluation functions:
- [`get_topics(task, subset=None)`][get_topics],
- [`get_judged_documents(task, subset=None, topic=None)`][get_judged_documents],
- [`get_random_ndcg(task, subset, topn)`][get_random_ndcg],
- [`get_ndcg(parsed_run, task, subset, topn, confidence)`][get_ndcg], and
- [`get_random_normalized_ndcg(parsed_run, task, subset, topn, ndcg)`][get_random_normalized_ndcg].
- [`get_judgement(task, subset, topic, judged_document)`][get_judgement].
#### Using the `validation` subset to compare various parameters of your system
``` sh
......@@ -125,12 +116,6 @@ $ python -m arqmath_eval.evaluate MIRMU-task1-Ensemble-auto-both-A.tsv all 2020
[arqmath-task1]: https://www.cs.rit.edu/~dprl/ARQMath/Task1-answers.html (Task 1: Find Answers)
[arqmath-task2]: https://www.cs.rit.edu/~dprl/ARQMath/task2-formulas.html (Task 2: Formula Search)
[get_topics]: https://github.com/MIR-MU/ARQMath-eval/blob/master/scripts/common.py#L35
[get_judged_documents]: https://github.com/MIR-MU/ARQMath-eval/blob/master/scripts/common.py#L62
[get_ndcg]: https://github.com/MIR-MU/ARQMath-eval/blob/master/scripts/common.py#L95
[get_random_ndcg]: https://github.com/MIR-MU/ARQMath-eval/blob/master/scripts/common.py#L140
[get_random_normalized_ndcg]: https://github.com/MIR-MU/ARQMath-eval/blob/master/scripts/common.py#L185
[get_judgement]: https://github.com/MIR-MU/ARQMath-eval/blob/master/scripts/common.py#L224
[ntcir-11-math-2]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.686.444&rep=rep1&type=pdf (NTCIR-11 Math-2 Task Overview)
[ntcir-12-mathir]: https://www.cs.rit.edu/~rlaz/files/ntcir12-mathir.pdf (NTCIR-12 MathIR Task Overview)
[treceval-format]: https://stackoverflow.com/a/8175382/657401 (How to evaluate a search/retrieval engine using trec_eval?)
......
......@@ -5,11 +5,20 @@ from math import log2
import numpy as np
import scipy.stats as st
from typing import Optional, Set
from .configuration import EVALUATORS, PARSED_RELEVANCE_JUDGEMENTS
def _remove_nonjudged_topics_and_documents(parsed_run, task, subset):
Scores = dict[str, float]
ParsedRun = dict[str, Scores]
Task = str
Subset = str
Topic = str
Document = str
def _remove_nonjudged_topics_and_documents(parsed_run: ParsedRun, task: Task, subset: Subset) -> ParsedRun:
parsed_relevance_judgements = PARSED_RELEVANCE_JUDGEMENTS[subset][task]
only_judged_parsed_run = deepcopy(parsed_run)
for topic_name, results in parsed_run.items():
......@@ -23,7 +32,7 @@ def _remove_nonjudged_topics_and_documents(parsed_run, task, subset):
return only_judged_parsed_run
def _clip_topn(parsed_run, topn):
def _clip_topn(parsed_run: ParsedRun, topn: int) -> ParsedRun:
clipped_parsed_run = {}
for topic, documents in parsed_run.items():
clipped_documents = sorted(documents.items(), key=lambda x: x[1], reverse=True)[:topn]
......@@ -31,7 +40,7 @@ def _clip_topn(parsed_run, topn):
return clipped_parsed_run
def get_topics(task, subset=None):
def get_topics(task: Task, subset: Optional[Subset] = None) -> Set[Topic]:
"""Returns the identifiers of topics for a subset of a task.
Parameters
......@@ -58,7 +67,7 @@ def get_topics(task, subset=None):
return topics
def get_judged_documents(task, subset=None, topic=None):
def get_judged_documents(task: Task, subset: Optional[Subset] = None, topic: Optional[Topic] = None) -> Set[Document]:
"""Returns the judged documents of a topic in a subset of a task.
Parameters
......@@ -91,7 +100,8 @@ def get_judged_documents(task, subset=None, topic=None):
return judged_documents
def get_ndcg(parsed_run, task, subset, topn=1000, confidence=None):
def get_ndcg(parsed_run: ParsedRun, task: Task, subset: Subset, topn: int = 1000,
confidence: Optional[float] = None):
"""Returns the NDCG' of a system's run on a subset of a task.
NDCG' is the same as NDCG (Normalized Discounted Cumulative Gain), but all
......@@ -136,7 +146,7 @@ def get_ndcg(parsed_run, task, subset, topn=1000, confidence=None):
return ndcg
def get_random_ndcg(task, subset, topn=1000):
def get_random_ndcg(task: Task, subset: Subset, topn: int = 1000) -> float:
"""Returns the expected NDCG' of a random system on a subset of a task.
NDCG' is the same as NDCG (Normalized Discounted Cumulative Gain), but all
......@@ -181,7 +191,8 @@ def get_random_ndcg(task, subset, topn=1000):
return np.mean(random_ndcgs)
def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000, ndcg=None):
def get_random_normalized_ndcg(parsed_run: ParsedRun, task: Task, subset: Subset, topn: int = 1000,
ndcg: Optional[float] = None) -> float:
"""Returns the random-normalized NDCG' of a system's run on a subset of a task.
NDCG' is the same as NDCG (Normalized Discounted Cumulative Gain), but all
......@@ -220,7 +231,7 @@ def get_random_normalized_ndcg(parsed_run, task, subset, topn=1000, ndcg=None):
return random_normalized_ndcg
def get_judgement(task, subset, topic, judged_document):
def get_judgement(task: Task, subset: Subset, topic: Topic, judged_document: Document) -> Optional[int]:
"""Returns judgement of a document in a topic from a subset of a task, or None if none exists.
NDCG' is the same as NDCG (Normalized Discounted Cumulative Gain), but all
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment