Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
nlp
ahisto-modules
Named Entity Recognition Experiments
Commits
04b847a2
Commit
04b847a2
authored
Aug 15, 2022
by
Vít Novotný
Browse files
Evaluate more often with fewer samples
parent
b3810c32
Changes
2
Hide whitespace changes
Inline
Side-by-side
ahisto_named_entity_search/default.ini
View file @
04b847a2
...
...
@@ -94,8 +94,9 @@ base_model = xlm-roberta-base
batch_size
=
4
gradient_accumulation_steps
=
4
log_every_n_steps
=
100
evaluate_every_n_steps
=
10000
save_every_n_steps
=
10000
evaluate_every_n_steps
=
1000
save_every_n_steps
=
1000
number_of_validation_samples
=
1000
number_of_training_epochs
=
10
schedule
=
fair-sequential-schedule
...
...
ahisto_named_entity_search/recognition/model.py
View file @
04b847a2
...
...
@@ -33,6 +33,7 @@ class NerModel:
LOGGING_STEPS
=
CONFIG
.
getint
(
'log_every_n_steps'
)
NUM_TRAIN_EPOCHS
=
CONFIG
.
getint
(
'number_of_training_epochs'
)
SCHEDULE_NAME
=
CONFIG
[
'schedule'
]
NUM_VALIDATION_SAMPLES
=
CONFIG
.
getint
(
'number_of_validation_samples'
)
def
__init__
(
self
,
model
:
AutoModelForTokenClassification
):
self
.
model
=
model
...
...
@@ -52,6 +53,7 @@ class NerModel:
# Set up masked language modeling (MLM) training
mlm_training_texts
=
list
(
Document
.
load_sentences
(
training_sentence_basename
))
mlm_validation_texts
=
list
(
Document
.
load_sentences
(
validation_sentence_basename
))
mlm_validation_texts
=
mlm_validation_texts
[:
cls
.
NUM_VALIDATION_SAMPLES
]
mlm_objective
=
MaskedLanguageModeling
(
lang_module
,
batch_size
=
cls
.
BATCH_SIZE
,
...
...
@@ -68,6 +70,8 @@ class NerModel:
ner_training_texts
,
ner_training_labels
=
load_ner_dataset
(
training_tagged_sentence_basename
)
ner_validation_texts
,
ner_validation_labels
=
load_ner_dataset
(
validation_tagged_sentence_basename
)
ner_validation_texts
=
ner_validation_texts
[:
cls
.
NUM_VALIDATION_SAMPLES
]
ner_validation_labels
=
ner_validation_labels
[:
cls
.
NUM_VALIDATION_SAMPLES
]
ner_evaluators
=
[
MeanFScore
(
decides_convergence
=
True
)]
ner_objective
=
TokenClassification
(
lang_module
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment