diff --git a/README.md b/README.md index 9ef33c06a1ec1cf7a825cea26683f59aba08334e..7b3e6bb13afdd1692086fd2e6c28d97d1d06d237 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ ```bash $ cd ragflow/docker + $ chmod +x ./entrypoint.sh $ docker compose up -d ``` @@ -165,12 +166,13 @@ $ git clone https://github.com/infiniflow/ragflow.git $ cd ragflow/ $ docker build -t infiniflow/ragflow:v1.0 . $ cd ragflow/docker +$ chmod +x ./entrypoint.sh $ docker compose up -d ``` ## 🆕 Latest Features -- Support [Ollam](./docs/ollama.md) for local LLM deployment. +- Support [Ollama](./docs/ollama.md) for local LLM deployment. - Support Chinese UI. ## đź“ś Roadmap diff --git a/README_ja.md b/README_ja.md index 8437bebfb7719985b5d430f501b290e525a481f1..2c5f50f6018ed9dc3379c320eab802298b2a49a6 100644 --- a/README_ja.md +++ b/README_ja.md @@ -101,6 +101,7 @@ ```bash $ cd ragflow/docker + $ chmod +x ./entrypoint.sh $ docker compose up -d ``` @@ -165,12 +166,13 @@ $ git clone https://github.com/infiniflow/ragflow.git $ cd ragflow/ $ docker build -t infiniflow/ragflow:v1.0 . $ cd ragflow/docker +$ chmod +x ./entrypoint.sh $ docker compose up -d ``` ## 🆕 最新ă®ć–°ć©źč˝ -- [Ollam](./docs/ollama.md) を使用ă—ăźĺ¤§č¦Źć¨ˇă˘ă‡ă«ă®ăăĽă‚«ă©ă‚¤ă‚şă•ă‚Śăźă‡ă—ăイăˇăłăをサăťăĽăă—ăľă™ă€‚ +- [Ollama](./docs/ollama.md) を使用ă—ăźĺ¤§č¦Źć¨ˇă˘ă‡ă«ă®ăăĽă‚«ă©ă‚¤ă‚şă•ă‚Śăźă‡ă—ăイăˇăłăをサăťăĽăă—ăľă™ă€‚ - ä¸ĺ›˝čŞžă‚¤ăłă‚żăĽă•ă‚§ăĽă‚ąă‚’サăťăĽăă—ăľă™ă€‚ ## đź“ś ăăĽă‰ăžăă— diff --git a/README_zh.md b/README_zh.md index eec642e8adeced8c5de6814eff192e3a65e981cd..21c93cdda10bd47c453973e41869a8f1ad7e61d7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -101,6 +101,7 @@ ```bash $ cd ragflow/docker + $ chmod +x ./entrypoint.sh $ docker compose -f docker-compose-CN.yml up -d ``` @@ -165,12 +166,13 @@ $ git clone https://github.com/infiniflow/ragflow.git $ cd ragflow/ $ docker build -t infiniflow/ragflow:v1.0 . $ cd ragflow/docker +$ chmod +x ./entrypoint.sh $ docker compose up -d ``` ## 🆕 最近新特性 -- 支ćŚç”¨ [Ollam](./docs/ollama.md) 对大模型进行本地化é¨ç˝˛ă€‚ +- 支ćŚç”¨ [Ollama](./docs/ollama.md) 对大模型进行本地化é¨ç˝˛ă€‚ - 支ćŚä¸ć–‡ç•Śéť˘ă€‚ ## đź“ś 路线图 diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 42339e1f00dae05b0202b15b976b17e8ef42885e..6ece253cafa969aedba934da74692b2148837cfe 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -20,7 +20,7 @@ from flask_login import login_required from api.db.services.dialog_service import DialogService, ConversationService from api.db import LLMType from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.services.llm_service import LLMService, LLMBundle +from api.db.services.llm_service import LLMService, LLMBundle, TenantLLMService from api.settings import access_logger, stat_logger, retrievaler, chat_logger from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils import get_uuid @@ -184,8 +184,11 @@ def chat(dialog, messages, **kwargs): assert messages[-1]["role"] == "user", "The last content of this conversation is not from user." llm = LLMService.query(llm_name=dialog.llm_id) if not llm: - raise LookupError("LLM(%s) not found" % dialog.llm_id) - llm = llm[0] + llm = TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=dialog.llm_id) + if not llm: + raise LookupError("LLM(%s) not found" % dialog.llm_id) + max_tokens = 1024 + else: max_tokens = llm[0].max_tokens questions = [m["content"] for m in messages if m["role"] == "user"] embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING) chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id) @@ -227,11 +230,11 @@ def chat(dialog, messages, **kwargs): gen_conf = dialog.llm_setting msg = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"] - used_token_count, msg = message_fit_in(msg, int(llm.max_tokens * 0.97)) + used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97)) if "max_tokens" in gen_conf: gen_conf["max_tokens"] = min( gen_conf["max_tokens"], - llm.max_tokens - used_token_count) + max_tokens - used_token_count) answer = chat_mdl.chat( prompt_config["system"].format( **kwargs), msg, gen_conf)