From 2305a53f79d06722754c0802b4560020611d7654 Mon Sep 17 00:00:00 2001
From: Vit Novotny <witiko@mail.muni.cz>
Date: Sat, 26 Feb 2022 13:08:11 +0000
Subject: [PATCH] Harden scripts.extract_detected_languages against flat
 directory structures

---
 scripts/extract_detected_languages.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/extract_detected_languages.py b/scripts/extract_detected_languages.py
index 8becd4b5..46b6e7af 100644
--- a/scripts/extract_detected_languages.py
+++ b/scripts/extract_detected_languages.py
@@ -27,11 +27,15 @@ THRESHOLD = float(sys.argv[5]) / 100.0
 
 
 def get_languages_worker(filename):
-    basename = str(INPUT_OCR_ROOT / filename.parent / filename.stem)
+    basename = INPUT_OCR_ROOT / filename.stem
     try:
-        languages = read_page_languages(basename, DETECTED_LANGUAGES, algorithm='OLDA')
+        languages = read_page_languages(str(basename), DETECTED_LANGUAGES, algorithm='OLDA')
     except IOError:
-        return 'not-exists'
+        basename = INPUT_OCR_ROOT / filename.parent / filename.stem
+        try:
+            languages = read_page_languages(str(basename), DETECTED_LANGUAGES, algorithm='OLDA')
+        except IOError:
+            return 'not-exists'
     return (filename, languages)
 
 
-- 
GitLab