From 2305a53f79d06722754c0802b4560020611d7654 Mon Sep 17 00:00:00 2001 From: Vit Novotny <witiko@mail.muni.cz> Date: Sat, 26 Feb 2022 13:08:11 +0000 Subject: [PATCH] Harden scripts.extract_detected_languages against flat directory structures --- scripts/extract_detected_languages.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/extract_detected_languages.py b/scripts/extract_detected_languages.py index 8becd4b5..46b6e7af 100644 --- a/scripts/extract_detected_languages.py +++ b/scripts/extract_detected_languages.py @@ -27,11 +27,15 @@ THRESHOLD = float(sys.argv[5]) / 100.0 def get_languages_worker(filename): - basename = str(INPUT_OCR_ROOT / filename.parent / filename.stem) + basename = INPUT_OCR_ROOT / filename.stem try: - languages = read_page_languages(basename, DETECTED_LANGUAGES, algorithm='OLDA') + languages = read_page_languages(str(basename), DETECTED_LANGUAGES, algorithm='OLDA') except IOError: - return 'not-exists' + basename = INPUT_OCR_ROOT / filename.parent / filename.stem + try: + languages = read_page_languages(str(basename), DETECTED_LANGUAGES, algorithm='OLDA') + except IOError: + return 'not-exists' return (filename, languages) -- GitLab