Commit 04b847a2 authored by Vít Novotný's avatar Vít Novotný
Browse files

Evaluate more often with fewer samples

parent b3810c32
......@@ -94,8 +94,9 @@ base_model = xlm-roberta-base
batch_size = 4
gradient_accumulation_steps = 4
log_every_n_steps = 100
evaluate_every_n_steps = 10000
save_every_n_steps = 10000
evaluate_every_n_steps = 1000
save_every_n_steps = 1000
number_of_validation_samples = 1000
number_of_training_epochs = 10
schedule = fair-sequential-schedule
......
......@@ -33,6 +33,7 @@ class NerModel:
LOGGING_STEPS = CONFIG.getint('log_every_n_steps')
NUM_TRAIN_EPOCHS = CONFIG.getint('number_of_training_epochs')
SCHEDULE_NAME = CONFIG['schedule']
NUM_VALIDATION_SAMPLES = CONFIG.getint('number_of_validation_samples')
def __init__(self, model: AutoModelForTokenClassification):
self.model = model
......@@ -52,6 +53,7 @@ class NerModel:
# Set up masked language modeling (MLM) training
mlm_training_texts = list(Document.load_sentences(training_sentence_basename))
mlm_validation_texts = list(Document.load_sentences(validation_sentence_basename))
mlm_validation_texts = mlm_validation_texts[:cls.NUM_VALIDATION_SAMPLES]
mlm_objective = MaskedLanguageModeling(lang_module,
batch_size=cls.BATCH_SIZE,
......@@ -68,6 +70,8 @@ class NerModel:
ner_training_texts, ner_training_labels = load_ner_dataset(training_tagged_sentence_basename)
ner_validation_texts, ner_validation_labels = load_ner_dataset(validation_tagged_sentence_basename)
ner_validation_texts = ner_validation_texts[:cls.NUM_VALIDATION_SAMPLES]
ner_validation_labels = ner_validation_labels[:cls.NUM_VALIDATION_SAMPLES]
ner_evaluators = [MeanFScore(decides_convergence=True)]
ner_objective = TokenClassification(lang_module,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment