Skip to content
Snippets Groups Projects
Commit d721be26 authored by Vít Starý Novotný's avatar Vít Starý Novotný
Browse files

Guard against zero probabilities of languages

parent c9796cbb
No related branches found
No related tags found
No related merge requests found
...@@ -645,6 +645,7 @@ def l1_normalize(dictionary): ...@@ -645,6 +645,7 @@ def l1_normalize(dictionary):
key: float(value) / value_sum key: float(value) / value_sum
for key, value for key, value
in dictionary.items() in dictionary.items()
if value > 0.0
} }
...@@ -701,9 +702,13 @@ def _read_page_languages_hocr(f): ...@@ -701,9 +702,13 @@ def _read_page_languages_hocr(f):
for paragraph in xml_document.xpath('//p[@lang]'): for paragraph in xml_document.xpath('//p[@lang]'):
paragraph_language_code = paragraph.attrib['lang'] paragraph_language_code = paragraph.attrib['lang']
paragraph_confidence = get_confidence(paragraph) paragraph_confidence = get_confidence(paragraph)
if not paragraph_confidence:
continue
for word in paragraph.xpath('.//span[@class="ocrx_word" and @lang]'): for word in paragraph.xpath('.//span[@class="ocrx_word" and @lang]'):
word_language_code = word.attrib['lang'] word_language_code = word.attrib['lang']
word_confidence = get_confidence(word) word_confidence = get_confidence(word)
if not word_confidence:
continue
languages[word_language_code] += word_confidence languages[word_language_code] += word_confidence
paragraph_confidence -= word_confidence paragraph_confidence -= word_confidence
assert paragraph_confidence >= 0.0 assert paragraph_confidence >= 0.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment