Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
nlp
ahisto-modules
Named Entity Recognition Experiments
Commits
51b285b6
Commit
51b285b6
authored
Sep 19, 2022
by
Vít Novotný
Browse files
Use micro-averaging in `AggregateMeanFScoreEvaluator.__call__()`
parent
152f26f4
Pipeline
#147314
failed with stage
in 7 minutes and 31 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
03_train_ner_models.ipynb
View file @
51b285b6
...
...
@@ -237,7 +237,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Loading documents: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 268669/268669 [00:06<00:00, 44
001.38
it/s]\n"
"Loading documents: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 268669/268669 [00:06<00:00, 44
308.95
it/s]\n"
]
}
],
...
...
@@ -462,7 +462,7 @@
},
{
"cell_type": "code",
"execution_count": 2
4
,
"execution_count": 2
1
,
"id": "694daad3-2b04-4e3f-8bfb-bb3fe0c87dd3",
"metadata": {},
"outputs": [],
...
...
@@ -473,7 +473,7 @@
},
{
"cell_type": "code",
"execution_count": 2
5
,
"execution_count": 2
2
,
"id": "fed4d0a4-5bc4-4af2-8e1b-c5a8a6b61c52",
"metadata": {},
"outputs": [],
...
...
@@ -491,7 +491,7 @@
},
{
"cell_type": "code",
"execution_count": 3
4
,
"execution_count":
2
3,
"id": "38efa732-8afd-4798-809a-ca828a8b960c",
"metadata": {},
"outputs": [],
...
...
@@ -501,7 +501,7 @@
},
{
"cell_type": "code",
"execution_count":
35
,
"execution_count":
24
,
"id": "1c9d3bab-53de-4c39-8e4d-cd2978f00925",
"metadata": {},
"outputs": [],
...
...
@@ -513,7 +513,7 @@
},
{
"cell_type": "code",
"execution_count":
36
,
"execution_count":
25
,
"id": "f47f62ca-1164-45b5-94b9-ff082787e8a9",
"metadata": {},
"outputs": [],
...
...
@@ -526,7 +526,7 @@
},
{
"cell_type": "code",
"execution_count":
37
,
"execution_count":
26
,
"id": "053ff5dd-775c-431d-a988-18bf3c4f4f6d",
"metadata": {},
"outputs": [
...
...
@@ -559,116 +559,116 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>model_ner_manatee_non-crossing_only-relevant_fine-tuning</th>\n",
" <td>49.97944%</td>\n",
" <td>96.66777%</td>\n",
" <td>41.04577%</td>\n",
" <td>62.56433%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_all_only-relevant_fine-tuning</th>\n",
" <td>49.63405%</td>\n",
" <td>96.74517%</td>\n",
" <td>39.86766%</td>\n",
" <td>62.08229%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_all_only-relevant_parallel</th>\n",
" <td>51.62152%</td>\n",
" <td>96.17568%</td>\n",
" <td>38.21471%</td>\n",
" <td>62.00397%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_non-crossing_only-relevant_parallel</th>\n",
" <td>49.71262%</td>\n",
" <td>96.10254%</td>\n",
" <td>38.21755%</td>\n",
" <td>61.34424%</td>\n",
" <td>94.77053%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_all_all_parallel</th>\n",
" <td>43.69512%</td>\n",
" <td>96.65764%</td>\n",
" <td>39.60441%</td>\n",
" <td>
59.98572
%</td>\n",
" <td>
94.52101
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_non-crossing_all_parallel</th>\n",
" <td>44.26929%</td>\n",
" <td>96.20140%</td>\n",
" <td>34.92015%</td>\n",
" <td>58.46362%</td>\n",
" <th>model_ner_manatee_non-crossing_only-relevant_fine-tuning</th>\n",
" <td>49.97944%</td>\n",
" <td>96.66777%</td>\n",
" <td>41.04577%</td>\n",
" <td>94.46477%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_all_only-relevant_fine-tuning</th>\n",
" <td>34.07244%</td>\n",
" <td>96.38580%</td>\n",
" <td>36.68019%</td>\n",
" <td>93.57176%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_non-crossing_only-relevant_fine-tuning</th>\n",
" <td>34.74969%</td>\n",
" <td>96.34174%</td>\n",
" <td>36.64832%</td>\n",
" <td>
55.91325
%</td>\n",
" <td>
93.41004
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_all_only-relevant_fine-tuning</th>\n",
" <td>34.07244%</td>\n",
" <td>96.38580%</td>\n",
" <td>36.68019%</td>\n",
" <td>55.71281%</td>\n",
" <th>model_ner_manatee_all_only-relevant_parallel</th>\n",
" <td>51.62152%</td>\n",
" <td>96.17568%</td>\n",
" <td>38.21471%</td>\n",
" <td>93.32879%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_non-crossing_all_parallel</th>\n",
" <td>44.26929%</td>\n",
" <td>96.20140%</td>\n",
" <td>34.92015%</td>\n",
" <td>93.31223%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_non-crossing_only-relevant_parallel</th>\n",
" <td>49.71262%</td>\n",
" <td>96.10254%</td>\n",
" <td>38.21755%</td>\n",
" <td>93.23893%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_all_all_parallel</th>\n",
" <td>33.04513%</td>\n",
" <td>95.95721%</td>\n",
" <td>37.66807%</td>\n",
" <td>
55.55681
%</td>\n",
" <td>
92.63503
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_non-crossing_all_parallel</th>\n",
" <td>33.17825%</td>\n",
" <td>95.71407%</td>\n",
" <td>32.79457%</td>\n",
" <td>
53.8956
3%</td>\n",
" <td>
92.0309
3%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_all_only-relevant_parallel</th>\n",
" <td>31.30961%</td>\n",
" <td>95.59693%</td>\n",
" <td>32.23955%</td>\n",
" <td>
53.04870
%</td>\n",
" <td>
91.78359
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_non-crossing_only-relevant_parallel</th>\n",
" <td>32.51893%</td>\n",
" <td>95.45893%</td>\n",
" <td>30.89947%</td>\n",
" <td>
52.95911
%</td>\n",
" <td>
91.47703
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_all_all_fine-tuning</th>\n",
" <td>2.17360%</td>\n",
" <td>42.86388%</td>\n",
" <td>3.80143%</td>\n",
" <td>
16.27964
%</td>\n",
" <td>
25.11350
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_manatee_non-crossing_all_fine-tuning</th>\n",
" <td>2.34774%</td>\n",
" <td>23.59883%</td>\n",
" <td>2.75918%</td>\n",
" <td>
9.56858
%</td>\n",
" <td>
13.38802
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Babelscape/wikineural-multilingual-ner baseline</th>\n",
" <td>7.35338%</td>\n",
" <td>13.35824%</td>\n",
" <td>2.84895%</td>\n",
" <td>
7.85352
%</td>\n",
" <td>
8.07667
%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>model_ner_fuzzy-regex_all_all_fine-tuning</th>\n",
" <td>2.38798%</td>\n",
" <td>7.33972%</td>\n",
" <td>3.32850%</td>\n",
" <td>4.
35207
%</td>\n",
" <td>4.
96872
%</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
...
...
@@ -676,14 +676,14 @@
],
"text/plain": [
" PER O \\\n",
"model_ner_manatee_non-crossing_only-relevant_fi... 49.97944% 96.66777% \n",
"model_ner_manatee_all_only-relevant_fine-tuning 49.63405% 96.74517% \n",
"model_ner_manatee_all_only-relevant_parallel 51.62152% 96.17568% \n",
"model_ner_manatee_non-crossing_only-relevant_pa... 49.71262% 96.10254% \n",
"model_ner_manatee_all_all_parallel 43.69512% 96.65764% \n",
"model_ner_manatee_non-crossing_all_parallel 44.26929% 96.20140% \n",
"model_ner_fuzzy-regex_non-crossing_only-relevan... 34.74969% 96.34174% \n",
"model_ner_manatee_non-crossing_only-relevant_fi... 49.97944% 96.66777% \n",
"model_ner_fuzzy-regex_all_only-relevant_fine-tu... 34.07244% 96.38580% \n",
"model_ner_fuzzy-regex_non-crossing_only-relevan... 34.74969% 96.34174% \n",
"model_ner_manatee_all_only-relevant_parallel 51.62152% 96.17568% \n",
"model_ner_manatee_non-crossing_all_parallel 44.26929% 96.20140% \n",
"model_ner_manatee_non-crossing_only-relevant_pa... 49.71262% 96.10254% \n",
"model_ner_fuzzy-regex_all_all_parallel 33.04513% 95.95721% \n",
"model_ner_fuzzy-regex_non-crossing_all_parallel 33.17825% 95.71407% \n",
"model_ner_fuzzy-regex_all_only-relevant_parallel 31.30961% 95.59693% \n",
...
...
@@ -694,22 +694,22 @@
"model_ner_fuzzy-regex_all_all_fine-tuning 2.38798% 7.33972% \n",
"\n",
" LOC all \n",
"model_ner_manatee_
non-crossing
_only-relevant_fi
... 41.04577% 62.5643
3% \n",
"model_ner_manatee_all_
only-relevant_fine-tuning 39.86766% 62.08229
% \n",
"model_ner_manatee_
all
_only-relevant_
parallel 38.21471% 62.0039
7% \n",
"model_ner_
manatee_non-crossing_only-relevant_pa... 38.21755% 61.34424
% \n",
"model_ner_
manatee_all_all_parallel
3
9
.6
0441% 59.98572
% \n",
"model_ner_manatee_
non-crossing_all_parallel 34.92015% 58.46362
% \n",
"model_ner_
fuzzy-regex
_non-crossing_
only-relevan... 36.64832% 55.91325
% \n",
"model_ner_
fuzzy-regex_all_only-relevant_fine-tu... 36.68019% 55.71281
% \n",
"model_ner_fuzzy-regex_all_all_parallel 37.66807%
55.55681
% \n",
"model_ner_fuzzy-regex_non-crossing_all_parallel 32.79457%
53.8956
3% \n",
"model_ner_fuzzy-regex_all_only-relevant_parallel 32.23955%
53.04870
% \n",
"model_ner_fuzzy-regex_non-crossing_only-relevan... 30.89947%
52.95911
% \n",
"model_ner_manatee_all_all_fine-tuning 3.80143%
16.27964
% \n",
"model_ner_manatee_non-crossing_all_fine-tuning 2.75918%
9.56858
% \n",
"Babelscape/wikineural-multilingual-ner baseline 2.84895%
7.85352
% \n",
"model_ner_fuzzy-regex_all_all_fine-tuning 3.32850% 4.
35207
% "
"model_ner_manatee_
all
_only-relevant_fi
ne-tuning 39.86766% 94.7705
3% \n",
"model_ner_manatee_all_
all_parallel 39.60441% 94.52101
% \n",
"model_ner_manatee_
non-crossing
_only-relevant_
fi... 41.04577% 94.4647
7% \n",
"model_ner_
fuzzy-regex_all_only-relevant_fine-tu... 36.68019% 93.57176
% \n",
"model_ner_
fuzzy-regex_non-crossing_only-relevan...
3
6
.6
4832% 93.41004
% \n",
"model_ner_manatee_
all_only-relevant_parallel 38.21471% 93.32879
% \n",
"model_ner_
manatee
_non-crossing_
all_parallel 34.92015% 93.31223
% \n",
"model_ner_
manatee_non-crossing_only-relevant_pa... 38.21755% 93.23893
% \n",
"model_ner_fuzzy-regex_all_all_parallel 37.66807%
92.63503
% \n",
"model_ner_fuzzy-regex_non-crossing_all_parallel 32.79457%
92.0309
3% \n",
"model_ner_fuzzy-regex_all_only-relevant_parallel 32.23955%
91.78359
% \n",
"model_ner_fuzzy-regex_non-crossing_only-relevan... 30.89947%
91.47703
% \n",
"model_ner_manatee_all_all_fine-tuning 3.80143%
25.11350
% \n",
"model_ner_manatee_non-crossing_all_fine-tuning 2.75918%
13.38802
% \n",
"Babelscape/wikineural-multilingual-ner baseline 2.84895%
8.07667
% \n",
"model_ner_fuzzy-regex_all_all_fine-tuning 3.32850% 4.
96872
% "
]
},
"metadata": {},
...
...
@@ -731,7 +731,7 @@
},
{
"cell_type": "code",
"execution_count":
43
,
"execution_count":
27
,
"id": "f7975e45-ba27-45b4-9b61-1a9c119d434d",
"metadata": {},
"outputs": [
...
...
@@ -739,7 +739,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"/nlp/projekty/ahisto/public_html/named-entity-search/results/model_ner_manatee_
non-crossing
_only-relevant_fine-tuning/TokenClassification\n"
"/nlp/projekty/ahisto/public_html/named-entity-search/results/model_ner_manatee_
all
_only-relevant_fine-tuning/TokenClassification\n"
]
}
],
...
...
@@ -761,7 +761,7 @@
},
{
"cell_type": "code",
"execution_count":
1
8,
"execution_count":
2
8,
"id": "fffd0beb-ac50-4c81-9eb3-cc225214ff63",
"metadata": {},
"outputs": [],
...
...
@@ -777,7 +777,7 @@
},
{
"cell_type": "code",
"execution_count":
1
9,
"execution_count":
2
9,
"id": "b83d2a1a-4d8f-400a-a8e8-cba43fe41a83",
"metadata": {},
"outputs": [
...
...
@@ -805,7 +805,7 @@
},
{
"cell_type": "code",
"execution_count":
2
0,
"execution_count":
3
0,
"id": "de04e6a9-33e5-4e85-9cfc-f0f5bc344677",
"metadata": {},
"outputs": [],
...
...
@@ -815,7 +815,7 @@
},
{
"cell_type": "code",
"execution_count":
2
1,
"execution_count":
3
1,
"id": "d11a74e1-f3c8-416c-897e-e921a69dc661",
"metadata": {},
"outputs": [],
...
...
@@ -829,7 +829,7 @@
},
{
"cell_type": "code",
"execution_count": 2
5
,
"execution_count":
3
2,
"id": "675dd306-50b0-4245-9904-effff2432921",
"metadata": {},
"outputs": [
...
...
@@ -862,7 +862,7 @@
},
{
"cell_type": "code",
"execution_count":
26
,
"execution_count":
33
,
"id": "2b1c528d-b4ec-4e96-8ca7-ff05fd244d80",
"metadata": {},
"outputs": [
...
...
@@ -890,7 +890,7 @@
"- I-LOC: ##de\n",
"- I-LOC: ##ch\n"
]
}
}
,
],
"source": [
"tag_sentence(baseline_model, example_sentence)"
...
...
ahisto_named_entity_search/recognition/evaluator.py
View file @
51b285b6
from
typing
import
Dict
,
Optional
,
Set
,
List
from
typing
import
Dict
,
Optional
,
Set
,
List
,
Tuple
from
functools
import
total_ordering
from
more_itertools
import
zip_equal
...
...
@@ -38,13 +38,14 @@ class AggregateMeanFScoreEvaluator(TokenClassificationEvaluator):
expected_labels
,
actual_labels
=
self
.
_collect_token_predictions
(
model
,
dataset
)
if
self
.
group_name
is
None
:
f_scores
=
[
self
.
get_f_score
(
self
.
GROUPS
[
group_name
],
expected_labels
,
actual_labels
)
for
group_name
in
self
.
__class__
.
get_all_group_names
()
]
assert
len
(
f_scores
)
>
0
mean_f_score
=
sum
(
f_scores
)
/
len
(
f_scores
)
mean_f_score
,
total_number_of_samples
=
0
,
0
for
group_name
in
self
.
__class__
.
get_all_group_names
():
number_of_samples
,
f_score
=
self
.
get_f_score
(
self
.
GROUPS
[
group_name
],
expected_labels
,
actual_labels
)
mean_f_score
+=
number_of_samples
*
f_score
total_number_of_samples
+=
number_of_samples
if
total_number_of_samples
>
0
:
_
,
mean_f_score
/=
total_number_of_samples
else
:
group
=
self
.
GROUPS
[
self
.
group_name
]
mean_f_score
=
self
.
get_f_score
(
group
,
expected_labels
,
actual_labels
)
...
...
@@ -52,7 +53,7 @@ class AggregateMeanFScoreEvaluator(TokenClassificationEvaluator):
return
mean_f_score
def
get_f_score
(
self
,
group
:
Group
,
expected_labels
:
List
[
Category
],
actual_labels
:
List
[
Category
])
->
FScore
:
actual_labels
:
List
[
Category
])
->
Tuple
[
int
,
FScore
]
:
expected_categories
:
Set
[
Category
]
=
{
self
.
category_map
[
category
]
for
category
...
...
@@ -69,8 +70,9 @@ class AggregateMeanFScoreEvaluator(TokenClassificationEvaluator):
elif
expected_label
in
expected_categories
and
actual_label
not
in
expected_categories
:
false_negatives
+=
1
number_of_samples
=
true_positives
+
false_positives
+
false_negatives
f_score
=
true_positives
/
(
true_positives
+
(
0.5
*
(
false_positives
+
false_negatives
)))
return
f_score
return
number_of_samples
,
f_score
@
classmethod
def
get_all_group_names
(
cls
)
->
Set
[
GroupName
]:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment