From c6b6c748aeada942c10df6c224a3271605f4a9e1 Mon Sep 17 00:00:00 2001 From: KevinHuSh <kevinhu.sh@gmail.com> Date: Tue, 7 May 2024 10:01:24 +0800 Subject: [PATCH] fix file encoding detection bug (#653) ### What problem does this PR solve? #651 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/file_app.py | 2 +- rag/nlp/__init__.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/api/apps/file_app.py b/api/apps/file_app.py index 93fd3fd..b94c155 100644 --- a/api/apps/file_app.py +++ b/api/apps/file_app.py @@ -335,7 +335,7 @@ def get(file_id): response = flask.make_response(MINIO.get(file.parent_id, file.location)) ext = re.search(r"\.([^.]+)$", file.name) if ext: - if doc.type == FileType.VISUAL.value: + if file.type == FileType.VISUAL.value: response.headers.set('Content-Type', 'image/%s' % ext.group(1)) else: response.headers.set( diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 61ba840..3a921c2 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -28,11 +28,17 @@ all_codecs = [ def find_codec(blob): global all_codecs for c in all_codecs: + try: + blob[:1024].decode(c) + return c + except Exception as e: + pass try: blob.decode(c) return c except Exception as e: pass + return "utf-8" -- GitLab