diff --git a/api/apps/api_app.py b/api/apps/api_app.py index f294272232bf7c1c462aea9432801268ee81860b..cc6f646d7bf4f2224a11f95bed86335e3b996c40 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -105,8 +105,8 @@ def stats(): res = { "pv": [(o["dt"], o["pv"]) for o in objs], "uv": [(o["dt"], o["uv"]) for o in objs], - "speed": [(o["dt"], o["tokens"]/o["duration"]) for o in objs], - "tokens": [(o["dt"], o["tokens"]/1000.) for o in objs], + "speed": [(o["dt"], float(o["tokens"])/float(o["duration"])) for o in objs], + "tokens": [(o["dt"], float(o["tokens"])/1000.) for o in objs], "round": [(o["dt"], o["round"]) for o in objs], "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs] } @@ -115,8 +115,7 @@ def stats(): return server_error_response(e) -@manager.route('/new_conversation', methods=['POST']) -@validate_request("user_id") +@manager.route('/new_conversation', methods=['GET']) def set_conversation(): token = request.headers.get('Authorization').split()[1] objs = APIToken.query(token=token) @@ -131,7 +130,7 @@ def set_conversation(): conv = { "id": get_uuid(), "dialog_id": dia.id, - "user_id": req["user_id"], + "user_id": request.args.get("user_id", ""), "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}] } API4ConversationService.save(**conv) diff --git a/api/db/db_models.py b/api/db/db_models.py index 27ad80f99d6fe3aaa19881dcf1d68de871026f61..e6f2d287ef079ae5ccbdd1e216fc174a21b3a7db 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -629,7 +629,7 @@ class Document(DataBaseModel): max_length=128, null=False, default="local", - help_text="where dose this document from") + help_text="where dose this document come from") type = CharField(max_length=32, null=False, help_text="file extension") created_by = CharField( max_length=32, diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index dfd3756d26e2926a796f88431ea1e494b7507095..6c3324544dd1f2e552cbbfb1a7c43b402f427adc 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -43,7 +43,9 @@ class HuParser: model_dir, "updown_concat_xgb.model")) except Exception as e: model_dir = snapshot_download( - repo_id="InfiniFlow/text_concat_xgb_v1.0") + repo_id="InfiniFlow/text_concat_xgb_v1.0", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False) self.updown_cnt_mdl.load_model(os.path.join( model_dir, "updown_concat_xgb.model")) diff --git a/deepdoc/vision/layout_recognizer.py b/deepdoc/vision/layout_recognizer.py index 917ee6ed83a48a2cbf61a0f33ea3608a3c314718..58ddcdb622d6ce12b7e37c09f946252b7900d935 100644 --- a/deepdoc/vision/layout_recognizer.py +++ b/deepdoc/vision/layout_recognizer.py @@ -43,7 +43,9 @@ class LayoutRecognizer(Recognizer): "rag/res/deepdoc") super().__init__(self.labels, domain, model_dir) except Exception as e: - model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc") + model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False) super().__init__(self.labels, domain, model_dir) self.garbage_layouts = ["footer", "header", "reference"] diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py index b55024ed456bb367047c50c0035a7af8710aeca3..d602da06fefada82796b4a5f1a0b6ca9720b96e5 100644 --- a/deepdoc/vision/ocr.py +++ b/deepdoc/vision/ocr.py @@ -486,7 +486,9 @@ class OCR(object): self.text_detector = TextDetector(model_dir) self.text_recognizer = TextRecognizer(model_dir) except Exception as e: - model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc") + model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False) self.text_detector = TextDetector(model_dir) self.text_recognizer = TextRecognizer(model_dir) diff --git a/deepdoc/vision/recognizer.py b/deepdoc/vision/recognizer.py index 67e096ef2def315e1b51a1891e741491cbee89d2..1ca7c4478ffd5129851a424724d1981cf128f13e 100644 --- a/deepdoc/vision/recognizer.py +++ b/deepdoc/vision/recognizer.py @@ -41,7 +41,9 @@ class Recognizer(object): "rag/res/deepdoc") model_file_path = os.path.join(model_dir, task_name + ".onnx") if not os.path.exists(model_file_path): - model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc") + model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False) model_file_path = os.path.join(model_dir, task_name + ".onnx") else: model_file_path = os.path.join(model_dir, task_name + ".onnx") diff --git a/deepdoc/vision/table_structure_recognizer.py b/deepdoc/vision/table_structure_recognizer.py index 6779137d87b39383835c1b5b55682b9f41f6e222..548eb62046481d4cf708c532f49b45b74fa7940b 100644 --- a/deepdoc/vision/table_structure_recognizer.py +++ b/deepdoc/vision/table_structure_recognizer.py @@ -39,7 +39,9 @@ class TableStructureRecognizer(Recognizer): get_project_base_directory(), "rag/res/deepdoc")) except Exception as e: - super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc")) + super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False)) def __call__(self, images, thr=0.2): tbls = super().__call__(images, thr) diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index e6ff0b117835819d34106d9f3dcedfc55560df04..e6e18fbed8fcea2fece6cb21b197fb619844b4ac 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -14,6 +14,8 @@ # limitations under the License. # from typing import Optional + +from huggingface_hub import snapshot_download from zhipuai import ZhipuAI import os from abc import ABC @@ -35,7 +37,10 @@ try: query_instruction_for_retrieval="为这个句ĺ生ćčˇ¨ç¤şä»Ąç”¨äşŽćŁ€ç´˘ç›¸ĺ…łć–‡ç« ďĽš", use_fp16=torch.cuda.is_available()) except Exception as e: - flag_model = FlagModel("BAAI/bge-large-zh-v1.5", + model_dir = snapshot_download(repo_id="BAAI/bge-large-zh-v1.5", + local_dir=os.path.join(get_project_base_directory(), "rag/res/bge-large-zh-v1.5"), + local_dir_use_symlinks=False) + flag_model = FlagModel(model_dir, query_instruction_for_retrieval="为这个句ĺ生ćčˇ¨ç¤şä»Ąç”¨äşŽćŁ€ç´˘ç›¸ĺ…łć–‡ç« ďĽš", use_fp16=torch.cuda.is_available())