Commit 1046301d authored by Vít Novotný's avatar Vít Novotný
Browse files

Train using NER tags in BIO format

parent 16f1e88d
Pipeline #146493 passed with stage
in 8 minutes and 41 seconds
......@@ -65,7 +65,7 @@ class NerModel:
ner_texts, all_ner_tags = [], []
for tagged_sentence in TaggedSentence.load(tagged_sentence_basename):
ner_texts.append(tagged_sentence.sentence)
all_ner_tags.append(tagged_sentence.ner_tags)
all_ner_tags.append(tagged_sentence.bio_ner_tags)
return ner_texts, all_ner_tags
ner_training_texts, ner_training_labels = load_ner_dataset(training_tagged_sentence_basename)
......
......@@ -64,6 +64,23 @@ class TaggedSentence:
ner_tags = ' '.join(self.ner_tags_tuple)
return ner_tags
@property
def bio_ner_tags(self) -> NerTags:
previous_ner_tag = None
bio_ner_tags_list = []
for ner_tag in self.ner_tags_tuple:
if ner_tag == 'O':
bio_ner_tag = ner_tag
else:
if previous_ner_tag is None or ner_tag != previous_ner_tag:
bio_ner_tag = f'B-{ner_tag}'
else:
bio_ner_tag = f'I-{ner_tag}'
bio_ner_tags_list.append(bio_ner_tag)
previous_ner_tag = ner_tag
bio_ner_tags = ' '.join(bio_ner_tags_list)
return bio_ner_tags
@classmethod
def save(cls, basename: str, tagged_sentences: Iterable['TaggedSentence']) -> None:
sentences_filename = cls._get_sentences_filename(basename)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment