diff --git a/scripts/extract_detected_languages.py b/scripts/extract_detected_languages.py index 8becd4b587585892068714aa6bf7cfdd02317481..46b6e7af058e6324e213df5318951adfc8bbbb23 100644 --- a/scripts/extract_detected_languages.py +++ b/scripts/extract_detected_languages.py @@ -27,11 +27,15 @@ THRESHOLD = float(sys.argv[5]) / 100.0 def get_languages_worker(filename): - basename = str(INPUT_OCR_ROOT / filename.parent / filename.stem) + basename = INPUT_OCR_ROOT / filename.stem try: - languages = read_page_languages(basename, DETECTED_LANGUAGES, algorithm='OLDA') + languages = read_page_languages(str(basename), DETECTED_LANGUAGES, algorithm='OLDA') except IOError: - return 'not-exists' + basename = INPUT_OCR_ROOT / filename.parent / filename.stem + try: + languages = read_page_languages(str(basename), DETECTED_LANGUAGES, algorithm='OLDA') + except IOError: + return 'not-exists' return (filename, languages)