diff --git a/README.md b/README.md index 7f1e884c80fd8907dff88ba3cfb9de4ded96b139..871c6fc2d022e6eb37aacc3748c4da8fee2675ae 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,7 @@ English | [简体ä¸ć–‡](./README_zh.md) If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/) ### OS Setups -Inorder to run [ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html), -you need to check the following command: +Firstly, you need to check the following command: ```bash 121:/ragflow# sysctl vm.max_map_count vm.max_map_count = 262144 @@ -25,23 +24,44 @@ Add or update the following line in the file: vm.max_map_count=262144 ``` -### Here we go! +## Here we go! > If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system. -> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./conf/service_conf.yaml) which is a +> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a > configuration of the back-end service and should be consistent with [.env](./docker/.env). -> - In [service_conf.yaml](./conf/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended. -> In **user_default_llm** of [service_conf.yaml](./conf/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_. +> - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended. +> In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_. > It's O.K if you don't have _API_KEY_ at the moment, you can specify it later at the setting part after starting and logging in the system. > - We have supported the flowing LLM factory, and the others is coming soon: > [OpenAI](https://platform.openai.com/login?launch), [通义ĺŤé—®/QWen](https://dashscope.console.aliyun.com/model), -> [智普AI/ZhipuAI](https://open.bigmodel.cn/) +> [智谱AI/ZhipuAI](https://open.bigmodel.cn/) ```bash 121:/ragflow# cd docker -121:/ragflow/docker# docker compose up +121:/ragflow/docker# docker compose up -d ``` -If after a few minutes, it stops screening and halts like following picture, _**Hallelujah!**_ You have successfully launched the system. +If after about a half of minutes, use the following command to check the server status. If you can have the following outputs, +_**Hallelujah!**_ You have successfully launched the system. +```bash +121:/ragflow# docker logs -f ragflow-server + + ____ ______ __ + / __ \ ____ _ ____ _ / ____// /____ _ __ + / /_/ // __ `// __ `// /_ / // __ \| | /| / / + / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ / +/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/ + /____/ + + * Running on all addresses (0.0.0.0) + * Running on http://127.0.0.1:9380 + * Running on http://172.22.0.5:9380 +INFO:werkzeug:Press CTRL+C to quit + +``` +Open your browser, after entering the IP address of your server, if you see the flowing in your browser, _**Hallelujah**_ again! +> The default serving port is 80, if you want to change that, please refer to [ragflow.conf](./nginx/ragflow.conf), +> and change the *listen* value. + <div align="center" style="margin-top:20px;margin-bottom:20px;"> -<img src="https://github.com/infiniflow/ragflow/assets/12318111/7dc8b73f-7890-41b4-aa09-97a417cfd20b" width="1000"/> +<img src="https://github.com/infiniflow/ragflow/assets/12318111/b24a7a5f-4d1d-4a30-90b1-7b0ec558b79d" width="1000"/> </div> \ No newline at end of file diff --git a/api/apps/user_app.py b/api/apps/user_app.py index da352faa6f3eefc5877517037af0ac144191ebfd..2c1c814e5b9942f66b9e301d1cf529de9436c344 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -70,11 +70,10 @@ def github_callback(): }, headers={"Accept": "application/json"}) res = res.json() if "error" in res: - return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR, - retmsg=res["error_description"]) + return redirect("/?error=%s" % res["error_description"]) if "user:email" not in res["scope"].split(","): - return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg='user:email not in scope') + return redirect("/?error=user:email not in scope") session["access_token"] = res["access_token"] session["access_token_from"] = "github" @@ -104,8 +103,9 @@ def github_callback(): except Exception as e: rollback_user_registration(user_id) stat_logger.exception(e) + return redirect("/?error=%s"%str(e)) - return redirect("/knowledge") + return redirect("/?auth=%s"%user_id) def user_info_from_github(access_token): diff --git a/api/db/init_data.py b/api/db/init_data.py index 92dce90117eaf20342c1ac0edbe3a3dd37ac6b04..b3ef43cdac1cb7a0e39c98e20348fc144ca2244b 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -85,7 +85,7 @@ def init_llm_factory(): "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION", "status": "1", },{ - "name": "智普AI", + "name": "智谱AI", "logo": "", "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION", "status": "1", diff --git a/api/ragflow_server.py b/api/ragflow_server.py index 44b48969506dcc63c1da3f9b8d1e3bc95f343510..a0d7a710ff94a9b373b306eeafee79316855d8cb 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# init env. must be the first import import logging import os diff --git a/api/settings.py b/api/settings.py index e0076edf68d6997bbb2f8d4da6904046a01c9fb5..98863dbd39c42625d79254030c0673d1217f3518 100644 --- a/api/settings.py +++ b/api/settings.py @@ -58,7 +58,7 @@ default_llm = { "image2text_model": "gpt-4-vision-preview", "asr_model": "whisper-1", }, - "智普AI": { + "智谱AI": { "chat_model": "glm-3-turbo", "embedding_model": "embedding-2", "image2text_model": "glm-4v", diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index f99aa91dae87cdf408121938b5c444409b8d4d33..7f7c91929c9095dc73a8fb5df8eb807f479f8810 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -24,9 +24,10 @@ logging.getLogger("pdfminer").setLevel(logging.WARNING) class HuParser: def __init__(self): self.ocr = OCR() - if not hasattr(self, "model_speciess"): - self.model_speciess = ParserType.NAIVE.value - self.layouter = LayoutRecognizer("layout."+self.model_speciess) + if hasattr(self, "model_speciess"): + self.layouter = LayoutRecognizer("layout."+self.model_speciess) + else: + self.layouter = LayoutRecognizer("layout") self.tbl_det = TableStructureRecognizer() self.updown_cnt_mdl = xgb.Booster() diff --git a/rag/app/naive.py b/rag/app/naive.py index b14e7bf832e0781a0674a198d958237d401e9848..c40d8542b3d4a1dfa9e7084cd132629ef48dc928 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -30,7 +30,6 @@ class Pdf(PdfParser): from timeit import default_timer as timer start = timer() - start = timer() self._layouts_rec(zoomin) callback(0.5, "Layout analysis finished.") print("paddle layouts:", timer() - start) @@ -102,7 +101,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca raise NotImplementedError("file type not supported yet(docx, pdf, txt supported)") parser_config = kwargs.get("parser_config", {"chunk_token_num": 128, "delimiter": "\n!?。;ďĽďĽź"}) - cks = naive_merge(sections, parser_config["chunk_token_num"], parser_config["delimiter"]) + cks = naive_merge(sections, parser_config.get("chunk_token_num", 128), parser_config.get("delimiter", "\n!?。;ďĽďĽź")) # wrap up to es documents for ck in cks: