diff --git a/README.md b/README.md index 71e031691dbd9e76a100a081a74dd74523ef91f6..c3123cce37e012a42136bc229085f2f8f7d39372 100644 --- a/README.md +++ b/README.md @@ -79,10 +79,6 @@ vm.max_map_count=262144 If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/) ## Quick Start -> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system. - -> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a -> configuration of the back-end service and should be consistent with [.env](./docker/.env). > - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended. > In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_. @@ -91,25 +87,25 @@ If your machine doesn't have *Docker* installed, please refer to [Install Docker > [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model), > [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs) ```bash -121:/# git clone https://github.com/infiniflow/ragflow.git -121:/# cd ragflow/docker -121:/ragflow/docker# docker compose up -d +$ git clone https://github.com/infiniflow/ragflow.git +$ cd ragflow/docker +$ docker compose up -d ``` ### OR ```bash -121:/# git clone https://github.com/infiniflow/ragflow.git -121:/# cd ragflow/ -121:/# docker build -t infiniflow/ragflow:v1.0 . -121:/# cd ragflow/docker -121:/ragflow/docker# docker compose up -d +$ git clone https://github.com/infiniflow/ragflow.git +$ cd ragflow/ +$ docker build -t infiniflow/ragflow:v1.0 . +$ cd ragflow/docker +$ docker compose up -d ``` > The core image is about 15GB, please be patient for the first time After pulling all the images and running up, use the following command to check the server status. If you can have the following outputs, _**Hallelujah!**_ You have successfully launched the system. ```bash -121:/ragflow# docker logs -f ragflow-server +$ docker logs -f ragflow-server ____ ______ __ / __ \ ____ _ ____ _ / ____// /____ _ __ @@ -139,6 +135,11 @@ If you need to change the default setting of the system when you deploy it. Ther Please refer to [README](./docker/README.md) and manually set the configuration. After changing something, please run *docker-compose up -d* again. +> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system. + +> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a +> configuration of the back-end service and should be consistent with [.env](./docker/.env). + # RoadMap - [ ] File manager. diff --git a/rag/app/naive.py b/rag/app/naive.py index 6dad8a21175cb41ffe7630e343141ae022085367..dc8726ffd6eb8f896cc264c1354ec741a0a54a8c 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -42,7 +42,9 @@ class Pdf(PdfParser): self._text_merge() callback(0.67, "Text merging finished") tbls = self._extract_table_figure(True, zoomin, True, True) - self._naive_vertical_merge() + #self._naive_vertical_merge() + self._concat_downward() + #self._filter_forpages() cron_logger.info("paddle layouts:".format( (timer() - start) / (self.total_page + 0.1))) @@ -79,7 +81,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, elif re.search(r"\.pdf$", filename, re.IGNORECASE): pdf_parser = Pdf( - ) if parser_config["layout_recognize"] else PlainParser() + ) if parser_config.get("layout_recognize", True) else PlainParser() sections, tbls = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback) res = tokenize_table(tbls, doc, eng) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index a3a644f804af98c87aba5446292b2d4ba7cd4242..cc9f533efb1e0115989871a8f7a02ee623ef02a1 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -7,7 +7,6 @@ from elasticsearch_dsl import Q, Search from typing import List, Optional, Dict, Union from dataclasses import dataclass -from api.settings import chat_logger from rag.settings import es_logger from rag.utils import rmSpace from rag.nlp import huqie, query @@ -365,6 +364,7 @@ class Dealer: return ranks def sql_retrieval(self, sql, fetch_size=128, format="json"): + from api.settings import chat_logger sql = re.sub(r"[ ]+", " ", sql) sql = sql.replace("%", "") es_logger.info(f"Get es sql: {sql}")