diff --git a/scripts/common.py b/scripts/common.py
index 9d4472d701592d6b699af49cd57dff60d428d7e9..e9e4be00b1da9c25b85ea26e9b402e4fbeebda1b 100644
--- a/scripts/common.py
+++ b/scripts/common.py
@@ -645,6 +645,7 @@ def l1_normalize(dictionary):
         key: float(value) / value_sum
         for key, value
         in dictionary.items()
+        if value > 0.0
     }
 
 
@@ -701,9 +702,13 @@ def _read_page_languages_hocr(f):
     for paragraph in xml_document.xpath('//p[@lang]'):
         paragraph_language_code = paragraph.attrib['lang']
         paragraph_confidence = get_confidence(paragraph)
+        if not paragraph_confidence:
+            continue
         for word in paragraph.xpath('.//span[@class="ocrx_word" and @lang]'):
             word_language_code = word.attrib['lang']
             word_confidence = get_confidence(word)
+            if not word_confidence:
+                continue
             languages[word_language_code] += word_confidence
             paragraph_confidence -= word_confidence
         assert paragraph_confidence >= 0.0