From 39269d2f794e23530da5107aa3dc77467bff475b Mon Sep 17 00:00:00 2001
From: KevinHuSh <>
Date: Wed, 28 Feb 2024 15:01:12 +0800
Subject: [PATCH] add dockerfile and fix trival bugs (#78)

 Dockerfile                          | 14 +++++++++                           | 48 ++++++++++++++++++++++++++++-                        |  1 +
 api/apps/            | 16 ++++++++++
 api/db/                 | 23 +++++++-------
 api/db/services/ |  9 +++++-
 api/               |  9 ++++++
 api/                     | 36 ++++++++++++++++++----
 api/utils/               |  2 +-
 conf/service_conf.yaml              | 37 ++++++++++------------
 docker/docker-compose.yml           | 18 +++++++++++
 docker/                | 24 +++++++++++++++
 docker/service_conf.yaml            | 36 ++++++++++++++++++++++
 rag/utils/             |  2 +-
 14 files changed, 233 insertions(+), 42 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644
 create mode 100644 docker/
 create mode 100644 docker/service_conf.yaml

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..09ee6e1
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,14 @@
+FROM infiniflow/ragflow-base:v1.0
+WORKDIR /ragflow
+COPY . ./
+RUN cd ./web && npm i && npm build
+COPY docker/ ./
+RUN chmod +x ./
+ENTRYPOINT ["/bin/bash", "./"]
\ No newline at end of file
diff --git a/ b/
index 21ab966..7f1e884 100644
--- a/
+++ b/
@@ -1 +1,47 @@
-# docgpt
\ No newline at end of file
+English | [简体中文](./
+## System Environment Preparation
+### Install docker
+If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](
+### OS Setups
+Inorder to run [ElasticSearch](, 
+you need to check the following command:
+121:/ragflow# sysctl vm.max_map_count
+vm.max_map_count = 262144
+If **vm.max_map_count** is not larger  than 65535, please run the following commands:
+121:/ragflow# sudo sysctl -w vm.max_map_count=262144
+However, this change is not persistent and will be reset after a system reboot. 
+To make the change permanent, you need to update the **/etc/sysctl.conf file**.
+Add or update the following line in the file:
+### Here we go!
+> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
+> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./conf/service_conf.yaml) which is a 
+> configuration of the back-end service and should be consistent with [.env](./docker/.env).
+> - In [service_conf.yaml](./conf/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended. 
+> In **user_default_llm** of [service_conf.yaml](./conf/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
+> It's O.K if you don't have _API_KEY_ at the moment, you can specify it later at the setting part after starting and logging in the system.
+> - We have supported the flowing LLM factory, and the others is coming soon: 
+> [OpenAI](, [通义千问/QWen](, 
+> [智普AI/ZhipuAI](
+121:/ragflow# cd docker
+121:/ragflow/docker# docker compose up 
+If after a few minutes, it stops screening and halts like following picture, _**Hallelujah!**_ You have successfully launched the system.
+<div align="center" style="margin-top:20px;margin-bottom:20px;">
+<img src="" width="1000"/>
\ No newline at end of file
diff --git a/ b/
new file mode 100644
index 0000000..c43cc56
--- /dev/null
+++ b/
@@ -0,0 +1 @@
+[English](./ | 简体中文
\ No newline at end of file
diff --git a/api/apps/ b/api/apps/
index 9ce7e2b..4a75922 100644
--- a/api/apps/
+++ b/api/apps/
@@ -146,6 +146,21 @@ def list():
         return server_error_response(e)
+@manager.route('/thumbnails', methods=['GET'])
+def thumbnails():
+    doc_ids = request.args.get("doc_ids").split(",")
+    if not doc_ids:
+        return get_json_result(
+            data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR)
+    try:
+        docs = DocumentService.get_thumbnails(doc_ids)
+        return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
+    except Exception as e:
+        return server_error_response(e)
 @manager.route('/change_status', methods=['POST'])
 @validate_request("doc_id", "status")
@@ -263,6 +278,7 @@ def rename():
 @manager.route('/get/<doc_id>', methods=['GET'])
 def get(doc_id):
         e, doc = DocumentService.get_by_id(doc_id)
diff --git a/api/db/ b/api/db/
index 531abf4..efa2d8a 100644
--- a/api/db/
+++ b/api/db/
@@ -56,21 +56,21 @@ def init_superuser():
              "api_key": API_KEY})
     if not**user_info):
-        print("【ERROR】can't init admin.")
+        print("\033[93m【ERROR】\033[0mcan't init admin.")
-    print("【INFO】Super user initialized. user name: admin, password: admin. Changing the password after logining is strongly recomanded.")
+    print("【INFO】Super user initialized. \033[93muser name: admin, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
     chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
     msg ="", history=[{"role": "user", "content": "Hello!"}], gen_conf={})
     if msg.find("ERROR: ") == 0:
-        print("【ERROR】: '{}' dosen't work. {}".format(tenant["llm_id"]), msg)
+        print("\33[91m【ERROR】\33[0m: ", "'{}' dosen't work. {}".format(tenant["llm_id"]), msg)
     embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
-    v,c = embd_mdl.encode(["Hello!"])
+    v, c = embd_mdl.encode(["Hello!"])
     if c == 0:
-        print("【ERROR】: '{}' dosen't work...".format(tenant["embd_id"]))
+        print("\33[91m【ERROR】\33[0m:", " '{}' dosen't work!".format(tenant["embd_id"]))
 def init_llm_factory():
@@ -89,12 +89,13 @@ def init_llm_factory():
             "logo": "",
             "status": "1",
-        },{
-            "name": "文心一言",
-            "logo": "",
-            "status": "1",
+        # {
+        #     "name": "文心一言",
+        #     "logo": "",
+        #     "status": "1",
+        # },
     llm_infos = [
         # ---------------------- OpenAI ------------------------
@@ -198,7 +199,7 @@ def init_llm_factory():
             "llm_name": "embedding-2",
             "tags": "TEXT EMBEDDING",
             "max_tokens": 512,
-            "model_type": LLMType.SPEECH2TEXT.value
+            "model_type": LLMType.EMBEDDING.value
     for info in factory_infos:
diff --git a/api/db/services/ b/api/db/services/
index d4d00c1..f58b0e1 100644
--- a/api/db/services/
+++ b/api/db/services/
@@ -107,4 +107,11 @@ class DocumentService(CommonService):
         docs =, on=( == cls.model.kb_id)).where( == doc_id, Knowledgebase.status==StatusEnum.VALID.value)
         docs = docs.dicts()
         if not docs:return
-        return docs[0]["tenant_id"]
\ No newline at end of file
+        return docs[0]["tenant_id"]
+    @classmethod
+    @DB.connection_context()
+    def get_thumbnails(cls, docids):
+        fields = [, cls.model.thumbnail]
+        return list(*fields).where(
diff --git a/api/ b/api/
index f322b4e..44b4896 100644
--- a/api/
+++ b/api/
@@ -33,6 +33,15 @@ from api.db.init_data import init_web_data
 from api.versions import get_versions
 if __name__ == '__main__':
+    print("""
+    ____                 ______ __               
+   / __ \ ____ _ ____ _ / ____// /____  _      __
+  / /_/ // __ `// __ `// /_   / // __ \| | /| / /
+ / _, _// /_/ // /_/ // __/  / // /_/ /| |/ |/ / 
+/_/ |_| \__,_/ \__, //_/    /_/ \____/ |__/|__/  
+              /____/                             
+    """)
         f'project base: {utils.file_utils.get_project_base_directory()}'
diff --git a/api/ b/api/
index 08f7dc7..331a086 100644
--- a/api/
+++ b/api/
@@ -45,12 +45,36 @@ REQUEST_MAX_WAIT_SEC = 300
 USE_REGISTRY = get_base_config("use_registry")
+default_llm = {
+    "通义千问": {
+        "chat_model": "qwen-plus",
+        "embedding_model": "text-embedding-v2",
+        "image2text_model": "qwen-vl-max",
+        "asr_model": "paraformer-realtime-8k-v1",
+    },
+    "OpenAI": {
+        "chat_model": "gpt-3.5-turbo",
+        "embedding_model": "text-embedding-ada-002",
+        "image2text_model": "gpt-4-vision-preview",
+        "asr_model": "whisper-1",
+    },
+    "智普AI": {
+        "chat_model": "glm-3-turbo",
+        "embedding_model": "embedding-2",
+        "image2text_model": "glm-4v",
+        "asr_model": "",
+    },
 LLM = get_base_config("user_default_llm", {})
-LLM_FACTORY=LLM.get("factory", "通义千问")
-CHAT_MDL = LLM.get("chat_model", "qwen-plus")
-EMBEDDING_MDL = LLM.get("embedding_model", "text-embedding-v2")
-ASR_MDL = LLM.get("asr_model", "paraformer-realtime-8k-v1")
-IMAGE2TEXT_MDL = LLM.get("image2text_model", "qwen-vl-max")
+LLM_FACTORY = LLM.get("factory", "通义千问")
+if LLM_FACTORY not in default_llm:
+    print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to '通义千问/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
+    LLM_FACTORY = "通义千问"
+CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
+EMBEDDING_MDL = default_llm[LLM_FACTORY]["embedding_model"]
+ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
+IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
 API_KEY = LLM.get("api_key", "infiniflow API Key")
 PARSERS = LLM.get("parsers", "general:General,qa:Q&A,resume:Resume,naive:Naive,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
@@ -72,7 +96,7 @@ RANDOM_INSTANCE_ID = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("random_inst
 PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
 PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
-DATABASE = decrypt_database_config()
+DATABASE = decrypt_database_config(name="mysql")
 # Logger
 LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))
diff --git a/api/utils/ b/api/utils/
index 49ebc5b..9ae6e0c 100644
--- a/api/utils/
+++ b/api/utils/
@@ -264,7 +264,7 @@ def decrypt_database_password(password):
     return pwdecrypt_fun(private_key, password)
-def decrypt_database_config(database=None, passwd_key="passwd", name="database"):
+def decrypt_database_config(database=None, passwd_key="password", name="database"):
     if not database:
         database = get_base_config(name, {})
diff --git a/conf/service_conf.yaml b/conf/service_conf.yaml
index 34b357c..5bf7b78 100644
--- a/conf/service_conf.yaml
+++ b/conf/service_conf.yaml
@@ -1,41 +1,36 @@
-  client:
-    switch: false
-    http_app_key:
-    http_secret_key:
-  site:
-    switch: false
-  switch: false
-  component: false
-  dataset: false
-  # you must set real ip address, and is not supported
   http_port: 9380
   name: 'rag_flow'
   user: 'root'
-  passwd: 'infini_rag_flow'
+  password: 'infini_rag_flow'
   host: ''
   port: 5455
   max_connections: 100
   stale_timeout: 30
   user: 'rag_flow'
-  passwd: 'infini_rag_flow'
+  password: 'infini_rag_flow'
   host: ''
-  hosts: ''
+  hosts: ''
   factory: '通义千问'
-  chat_model: 'qwen-plus'
-  embedding_model: 'text-embedding-v2'
-  asr_model: 'paraformer-realtime-8k-v1'
-  image2text_model: 'qwen-vl-max'
   api_key: 'sk-xxxxxxxxxxxxx'
     client_id: 302129228f0d96055bee
     secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
-    url:
\ No newline at end of file
+    url:
+  client:
+    switch: false
+    http_app_key:
+    http_secret_key:
+  site:
+    switch: false
+  switch: false
+  component: false
+  dataset: false
\ No newline at end of file
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index b01d215..1cd7f03 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -84,6 +84,24 @@ services:
     restart: always
+  ragflow:
+    depends_on:
+      - es01
+      - mysql
+      - minio
+    image: infiniflow/ragflow:v1.0
+    container_name: ragflow-server
+    ports:
+      - ${SVR_HTTP_PORT}:9380
+    volumes:
+      - ./service_conf.yaml:/ragflow/conf/service_conf.yaml
+      - ./nginx.conf:/etc/nginx/nginx.conf
+      - ./ragflow-logs:/ragflow/logs
+    networks:
+      - ragflow
+    restart: always
     driver: local
diff --git a/docker/ b/docker/
new file mode 100644
index 0000000..99661f1
--- /dev/null
+++ b/docker/
@@ -0,0 +1,24 @@
+python rag/svr/ &
+function task_exe(){
+  while [ 1 -eq 1 ];do mpirun -n 2 python rag/svr/ ; done
+function watch_broker(){
+  while [ 1 -eq 1];do
+    C=`ps aux|grep ""|grep -v grep|wc -l`;
+    if [ $C -lt 1 ];then
+      python rag/svr/ &
+    fi
+    sleep 5;
+  done
+task_exe &
+sleep 10;
+watch_broker &
+python api/
\ No newline at end of file
diff --git a/docker/service_conf.yaml b/docker/service_conf.yaml
new file mode 100644
index 0000000..5bf7b78
--- /dev/null
+++ b/docker/service_conf.yaml
@@ -0,0 +1,36 @@
+  host:
+  http_port: 9380
+  name: 'rag_flow'
+  user: 'root'
+  password: 'infini_rag_flow'
+  host: ''
+  port: 5455
+  max_connections: 100
+  stale_timeout: 30
+  user: 'rag_flow'
+  password: 'infini_rag_flow'
+  host: ''
+  hosts: ''
+  factory: '通义千问'
+  api_key: 'sk-xxxxxxxxxxxxx'
+  github:
+    client_id: 302129228f0d96055bee
+    secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
+    url:
+  client:
+    switch: false
+    http_app_key:
+    http_secret_key:
+  site:
+    switch: false
+  switch: false
+  component: false
+  dataset: false
\ No newline at end of file
diff --git a/rag/utils/ b/rag/utils/
index 14a7067..18d3d3b 100644
--- a/rag/utils/
+++ b/rag/utils/
@@ -23,7 +23,7 @@ class HuMinio(object):
             self.conn = Minio(settings.MINIO["host"],
-                              secret_key=settings.MINIO["passwd"],
+                              secret_key=settings.MINIO["password"],
         except Exception as e: