From c6b6c748aeada942c10df6c224a3271605f4a9e1 Mon Sep 17 00:00:00 2001
From: KevinHuSh <kevinhu.sh@gmail.com>
Date: Tue, 7 May 2024 10:01:24 +0800
Subject: [PATCH] fix file encoding detection bug (#653)

### What problem does this PR solve?

#651

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 api/apps/file_app.py | 2 +-
 rag/nlp/__init__.py  | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/api/apps/file_app.py b/api/apps/file_app.py
index 93fd3fd..b94c155 100644
--- a/api/apps/file_app.py
+++ b/api/apps/file_app.py
@@ -335,7 +335,7 @@ def get(file_id):
         response = flask.make_response(MINIO.get(file.parent_id, file.location))
         ext = re.search(r"\.([^.]+)$", file.name)
         if ext:
-            if doc.type == FileType.VISUAL.value:
+            if file.type == FileType.VISUAL.value:
                 response.headers.set('Content-Type', 'image/%s' % ext.group(1))
             else:
                 response.headers.set(
diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py
index 61ba840..3a921c2 100644
--- a/rag/nlp/__init__.py
+++ b/rag/nlp/__init__.py
@@ -28,11 +28,17 @@ all_codecs = [
 def find_codec(blob):
     global all_codecs
     for c in all_codecs:
+        try:
+            blob[:1024].decode(c)
+            return c
+        except Exception as e:
+            pass
         try:
             blob.decode(c)
             return c
         except Exception as e:
             pass
+
     return "utf-8"
 
 
-- 
GitLab