Loading ahisto_named_entity_search/default.ini +3 −2 Original line number Diff line number Diff line Loading @@ -94,8 +94,9 @@ base_model = xlm-roberta-base batch_size = 4 gradient_accumulation_steps = 4 log_every_n_steps = 100 evaluate_every_n_steps = 10000 save_every_n_steps = 10000 evaluate_every_n_steps = 1000 save_every_n_steps = 1000 number_of_validation_samples = 1000 number_of_training_epochs = 10 schedule = fair-sequential-schedule Loading ahisto_named_entity_search/recognition/model.py +4 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ class NerModel: LOGGING_STEPS = CONFIG.getint('log_every_n_steps') NUM_TRAIN_EPOCHS = CONFIG.getint('number_of_training_epochs') SCHEDULE_NAME = CONFIG['schedule'] NUM_VALIDATION_SAMPLES = CONFIG.getint('number_of_validation_samples') def __init__(self, model: AutoModelForTokenClassification): self.model = model Loading @@ -52,6 +53,7 @@ class NerModel: # Set up masked language modeling (MLM) training mlm_training_texts = list(Document.load_sentences(training_sentence_basename)) mlm_validation_texts = list(Document.load_sentences(validation_sentence_basename)) mlm_validation_texts = mlm_validation_texts[:cls.NUM_VALIDATION_SAMPLES] mlm_objective = MaskedLanguageModeling(lang_module, batch_size=cls.BATCH_SIZE, Loading @@ -68,6 +70,8 @@ class NerModel: ner_training_texts, ner_training_labels = load_ner_dataset(training_tagged_sentence_basename) ner_validation_texts, ner_validation_labels = load_ner_dataset(validation_tagged_sentence_basename) ner_validation_texts = ner_validation_texts[:cls.NUM_VALIDATION_SAMPLES] ner_validation_labels = ner_validation_labels[:cls.NUM_VALIDATION_SAMPLES] ner_evaluators = [MeanFScore(decides_convergence=True)] ner_objective = TokenClassification(lang_module, Loading Loading
ahisto_named_entity_search/default.ini +3 −2 Original line number Diff line number Diff line Loading @@ -94,8 +94,9 @@ base_model = xlm-roberta-base batch_size = 4 gradient_accumulation_steps = 4 log_every_n_steps = 100 evaluate_every_n_steps = 10000 save_every_n_steps = 10000 evaluate_every_n_steps = 1000 save_every_n_steps = 1000 number_of_validation_samples = 1000 number_of_training_epochs = 10 schedule = fair-sequential-schedule Loading
ahisto_named_entity_search/recognition/model.py +4 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ class NerModel: LOGGING_STEPS = CONFIG.getint('log_every_n_steps') NUM_TRAIN_EPOCHS = CONFIG.getint('number_of_training_epochs') SCHEDULE_NAME = CONFIG['schedule'] NUM_VALIDATION_SAMPLES = CONFIG.getint('number_of_validation_samples') def __init__(self, model: AutoModelForTokenClassification): self.model = model Loading @@ -52,6 +53,7 @@ class NerModel: # Set up masked language modeling (MLM) training mlm_training_texts = list(Document.load_sentences(training_sentence_basename)) mlm_validation_texts = list(Document.load_sentences(validation_sentence_basename)) mlm_validation_texts = mlm_validation_texts[:cls.NUM_VALIDATION_SAMPLES] mlm_objective = MaskedLanguageModeling(lang_module, batch_size=cls.BATCH_SIZE, Loading @@ -68,6 +70,8 @@ class NerModel: ner_training_texts, ner_training_labels = load_ner_dataset(training_tagged_sentence_basename) ner_validation_texts, ner_validation_labels = load_ner_dataset(validation_tagged_sentence_basename) ner_validation_texts = ner_validation_texts[:cls.NUM_VALIDATION_SAMPLES] ner_validation_labels = ner_validation_labels[:cls.NUM_VALIDATION_SAMPLES] ner_evaluators = [MeanFScore(decides_convergence=True)] ner_objective = TokenClassification(lang_module, Loading