From a7be5d4e8ba48f84fd6c074de06e9bdb49b7630d Mon Sep 17 00:00:00 2001
From: jie yang <55631829+yangjie407@users.noreply.github.com>
Date: Tue, 16 Apr 2024 12:29:58 +0800
Subject: [PATCH] build ragflow image from scratch (#376)

### What problem does this PR solve?

issue: #205

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 Dockerfile                        | 40 +++++++++++------------
 Dockerfile.scratch                | 54 +++++++++++++++++++++++++++++++
 deepdoc/parser/resume/step_two.py |  4 +--
 requirements.txt                  |  2 +-
 4 files changed, 77 insertions(+), 23 deletions(-)
 create mode 100644 Dockerfile.scratch

diff --git a/Dockerfile b/Dockerfile
index c174ccb..c981509 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,20 +1,20 @@
-FROM swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow-base:v1.0
-USER  root
-
-WORKDIR /ragflow
-
-ADD ./web ./web
-RUN cd ./web && npm i && npm run build
-
-ADD ./api ./api
-ADD ./conf ./conf
-ADD ./deepdoc ./deepdoc
-ADD ./rag ./rag
-
-ENV PYTHONPATH=/ragflow/
-ENV HF_ENDPOINT=https://hf-mirror.com
-
-ADD docker/entrypoint.sh ./entrypoint.sh
-RUN chmod +x ./entrypoint.sh
-
-ENTRYPOINT ["./entrypoint.sh"]
+FROM swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow-base:v1.0
+USER  root
+
+WORKDIR /ragflow
+
+ADD ./web ./web
+RUN cd ./web && npm i && npm run build
+
+ADD ./api ./api
+ADD ./conf ./conf
+ADD ./deepdoc ./deepdoc
+ADD ./rag ./rag
+
+ENV PYTHONPATH=/ragflow/
+ENV HF_ENDPOINT=https://hf-mirror.com
+
+ADD docker/entrypoint.sh ./entrypoint.sh
+RUN chmod +x ./entrypoint.sh
+
+ENTRYPOINT ["./entrypoint.sh"]
\ No newline at end of file
diff --git a/Dockerfile.scratch b/Dockerfile.scratch
new file mode 100644
index 0000000..3e9483e
--- /dev/null
+++ b/Dockerfile.scratch
@@ -0,0 +1,54 @@
+FROM ubuntu:22.04
+USER root
+
+WORKDIR /ragflow
+
+RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev
+
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+    bash ~/miniconda.sh -b -p /root/miniconda3 && \
+    rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc
+
+ENV PATH /root/miniconda3/bin:$PATH
+
+RUN conda create -y --name py11 python=3.11
+
+ENV CONDA_DEFAULT_ENV py11
+ENV CONDA_PREFIX /root/miniconda3/envs/py11
+ENV PATH $CONDA_PREFIX/bin:$PATH
+
+RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
+RUN apt-get install -y nodejs
+
+RUN apt-get install -y nginx
+
+ADD ./web ./web
+ADD ./api ./api
+ADD ./conf ./conf
+ADD ./deepdoc ./deepdoc
+ADD ./rag ./rag
+ADD ./requirements.txt ./requirements.txt
+
+RUN apt install openmpi-bin openmpi-common libopenmpi-dev
+ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH
+RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
+RUN cd ./web && npm i && npm run build
+RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt
+
+RUN apt-get update && \
+    apt-get install -y libglib2.0-0 libgl1-mesa-glx && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama
+RUN conda run -n py11 python -m nltk.downloader punkt
+RUN conda run -n py11 python -m nltk.downloader wordnet
+
+ENV PYTHONPATH=/ragflow/
+ENV HF_ENDPOINT=https://hf-mirror.com
+
+ADD docker/entrypoint.sh ./entrypoint.sh
+RUN chmod +x ./entrypoint.sh
+
+ENTRYPOINT ["./entrypoint.sh"]
diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py
index 207cc79..20b0223 100644
--- a/deepdoc/parser/resume/step_two.py
+++ b/deepdoc/parser/resume/step_two.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-import re, copy, time, datetime, demjson, \
+import re, copy, time, datetime, demjson3, \
     traceback, signal
 import numpy as np
 from deepdoc.parser.resume.entities import degrees, schools, corporations
@@ -197,7 +197,7 @@ def forProj(cv):
 
 
 def json_loads(line):
-    return demjson.decode(re.sub(r": *(True|False)", r": '\1'", line))
+    return demjson3.decode(re.sub(r": *(True|False)", r": '\1'", line))
 
 
 def forWork(cv):
diff --git a/requirements.txt b/requirements.txt
index 9e32969..9cf9234 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,7 @@ cryptography==42.0.5
 dashscope==1.14.1
 datasets==2.17.1
 datrie==0.8.2
-demjson==2.2.4
+demjson3==3.0.6
 dill==0.3.8
 distro==1.9.0
 elastic-transport==8.12.0
-- 
GitLab