From 121c7a5681eec487d146f49adb2006765a61163b Mon Sep 17 00:00:00 2001 From: KevinHuSh <kevinhu.sh@gmail.com> Date: Sun, 31 Mar 2024 19:09:42 +0800 Subject: [PATCH] refine error response, add set api-key MD (#178) --- api/apps/conversation_app.py | 2 + api/apps/kb_app.py | 19 +++- api/db/services/knowledgebase_service.py | 2 +- api/utils/api_utils.py | 3 + docker/docker-compose-CN.yml | 133 +++++++++++++++++++++++ docs/llm_api_key_setup.md | 19 ++++ rag/llm/chat_model.py | 2 +- rag/llm/rpc_server.py | 18 +++ 8 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 docker/docker-compose-CN.yml create mode 100644 docs/llm_api_key_setup.md diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 5521cca..40996aa 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -253,6 +253,8 @@ def chat(dialog, messages, **kwargs): for c in kbinfos["chunks"]: if c.get("vector"): del c["vector"] + if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api")>=0: + answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'" return {"answer": answer, "reference": kbinfos} diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index bcffbc8..3eae9bd 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -13,10 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from elasticsearch_dsl import Q from flask import request from flask_login import login_required, current_user from api.db.services import duplicate_name +from api.db.services.document_service import DocumentService from api.db.services.user_service import TenantService, UserTenantService from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils import get_uuid, get_format_time @@ -25,6 +27,8 @@ from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.db_models import Knowledgebase from api.settings import stat_logger, RetCode from api.utils.api_utils import get_json_result +from rag.nlp import search +from rag.utils import ELASTICSEARCH @manager.route('/create', methods=['post']) @@ -125,11 +129,22 @@ def list(): def rm(): req = request.json try: - if not KnowledgebaseService.query( - created_by=current_user.id, id=req["kb_id"]): + kbs = KnowledgebaseService.query( + created_by=current_user.id, id=req["kb_id"]) + if not kbs: return get_json_result( data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) + for doc in DocumentService.query(kb_id=req["kb_id"]): + ELASTICSEARCH.deleteByQuery( + Q("match", doc_id=doc.id), idxnm=search.index_name(kbs[0].tenant_id)) + + DocumentService.increment_chunk_num( + doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0) + if not DocumentService.delete(doc): + return get_data_error_result( + retmsg="Database error (Document removal)!") + if not KnowledgebaseService.update_by_id( req["kb_id"], {"status": StatusEnum.INVALID.value}): return get_data_error_result( diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 365f8ed..ce34b72 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -62,7 +62,7 @@ class KnowledgebaseService(CommonService): if not kbs: return d = kbs[0].to_dict() - d["embd_id"] = kbs[0].tenant.embd_id + #d["embd_id"] = kbs[0].tenant.embd_id return d @classmethod diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index bf6ddb3..1ce3664 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -149,6 +149,9 @@ def server_error_response(e): if len(e.args) > 1: return get_json_result( retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1]) + if repr(e).find("index_not_found_exception") >=0: + return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.") + return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e)) diff --git a/docker/docker-compose-CN.yml b/docker/docker-compose-CN.yml new file mode 100644 index 0000000..67d4448 --- /dev/null +++ b/docker/docker-compose-CN.yml @@ -0,0 +1,133 @@ +version: '2.2' +services: + es01: + container_name: ragflow-es-01 + image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} + volumes: + - esdata01:/usr/share/elasticsearch/data + ports: + - ${ES_PORT}:9200 + environment: + - node.name=es01 + - cluster.name=${CLUSTER_NAME} + - cluster.initial_master_nodes=es01 + - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + - bootstrap.memory_lock=false + - xpack.security.enabled=false + - TZ=${TIMEZONE} + mem_limit: ${MEM_LIMIT} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: ["CMD-SHELL", "curl http://localhost:9200"] + interval: 10s + timeout: 10s + retries: 120 + networks: + - ragflow + restart: always + + kibana: + depends_on: + es01: + condition: service_healthy + image: docker.elastic.co/kibana/kibana:${STACK_VERSION} + container_name: ragflow-kibana + volumes: + - kibanadata:/usr/share/kibana/data + ports: + - ${KIBANA_PORT}:5601 + environment: + - SERVERNAME=kibana + - ELASTICSEARCH_HOSTS=http://es01:9200 + - TZ=${TIMEZONE} + mem_limit: ${MEM_LIMIT} + networks: + - ragflow + + mysql: + image: mysql:5.7.18 + container_name: ragflow-mysql + environment: + - MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD} + - TZ=${TIMEZONE} + command: + --max_connections=1000 + --character-set-server=utf8mb4 + --collation-server=utf8mb4_general_ci + --default-authentication-plugin=mysql_native_password + --tls_version="TLSv1.2,TLSv1.3" + --init-file /data/application/init.sql + ports: + - ${MYSQL_PORT}:3306 + volumes: + - mysql_data:/var/lib/mysql + - ./init.sql:/data/application/init.sql + networks: + - ragflow + healthcheck: + test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"] + interval: 10s + timeout: 10s + retries: 3 + restart: always + + + minio: + image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z + container_name: ragflow-minio + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + - MINIO_ROOT_USER=${MINIO_USER} + - MINIO_ROOT_PASSWORD=${MINIO_PASSWORD} + - TZ=${TIMEZONE} + volumes: + - minio_data:/data + networks: + - ragflow + restart: always + + + ragflow: + depends_on: + mysql: + condition: service_healthy + es01: + condition: service_healthy + image: swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:v1.0 + container_name: ragflow-server + ports: + - ${SVR_HTTP_PORT}:9380 + - 80:80 + - 443:443 + volumes: + - ./service_conf.yaml:/ragflow/conf/service_conf.yaml + - ./ragflow-logs:/ragflow/logs + - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf + - ./nginx/proxy.conf:/etc/nginx/proxy.conf + - ./nginx/nginx.conf:/etc/nginx/nginx.conf + environment: + - TZ=${TIMEZONE} + networks: + - ragflow + restart: always + + +volumes: + esdata01: + driver: local + kibanadata: + driver: local + mysql_data: + driver: local + minio_data: + driver: local + +networks: + ragflow: + driver: bridge diff --git a/docs/llm_api_key_setup.md b/docs/llm_api_key_setup.md new file mode 100644 index 0000000..cf8d9b2 --- /dev/null +++ b/docs/llm_api_key_setup.md @@ -0,0 +1,19 @@ + +## Set Before Starting The System + +In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_. +RagFlow supports the flowing LLM factory, and with more coming in the pipeline: + +> [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model), +> [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs) + +After sign in these LLM suppliers, create your own API-Key, they all have a certain amount of free quota. + +## After Starting The System + +You can also set API-Key in **User Setting** as following: + +<div align="center" style="margin-top:20px;margin-bottom:20px;"> +<img src="https://github.com/infiniflow/ragflow/assets/12318111/e4e4066c-e964-45ff-bd56-c3fc7fb18bd3" width="1000"/> +</div> + diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index af3bbac..623246f 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -156,7 +156,7 @@ class LocalLLM(Base): return do_rpc - def __init__(self, **kwargs): + def __init__(self, *args, **kwargs): self.client = LocalLLM.RPCProxy("127.0.0.1", 7860) def chat(self, system, history, gen_conf): diff --git a/rag/llm/rpc_server.py b/rag/llm/rpc_server.py index ce15d74..ec5d6b9 100644 --- a/rag/llm/rpc_server.py +++ b/rag/llm/rpc_server.py @@ -7,6 +7,23 @@ from threading import Thread from transformers import AutoModelForCausalLM, AutoTokenizer +def torch_gc(): + try: + import torch + if torch.cuda.is_available(): + # with torch.cuda.device(DEVICE): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + elif torch.backends.mps.is_available(): + try: + from torch.mps import empty_cache + empty_cache() + except Exception as e: + pass + except Exception: + pass + + class RPCHandler: def __init__(self): self._functions = {} @@ -49,6 +66,7 @@ def chat(messages, gen_conf): global tokenizer model = Model() try: + torch_gc() conf = { "max_new_tokens": int( gen_conf.get( -- GitLab