From cacd36c5e1b4d5077cdc02dade1f46bae92fb762 Mon Sep 17 00:00:00 2001 From: KevinHuSh <kevinhu.sh@gmail.com> Date: Wed, 21 Feb 2024 16:32:38 +0800 Subject: [PATCH] use onnx models, new deepdoc (#68) --- api/apps/conversation_app.py | 2 +- api/apps/dialog_app.py | 78 +- api/db/db_models.py | 2 - deepdoc/__init__.py | 0 {rag => deepdoc}/parser/__init__.py | 3 +- {rag => deepdoc}/parser/docx_parser.py | 0 {rag => deepdoc}/parser/excel_parser.py | 0 {rag => deepdoc}/parser/pdf_parser.py | 48 +- deepdoc/visual/__init__.py | 2 + deepdoc/visual/ocr.py | 561 ++ deepdoc/visual/ocr.res | 6623 +++++++++++++++++++++++ deepdoc/visual/operators.py | 710 +++ deepdoc/visual/postprocess.py | 354 ++ deepdoc/visual/recognizer.py | 139 + deepdoc/visual/seeit.py | 83 + rag/app/book.py | 25 +- rag/app/laws.py | 23 +- rag/app/manual.py | 8 +- rag/app/naive.py | 20 +- rag/app/paper.py | 20 +- rag/app/presentation.py | 21 +- rag/app/qa.py | 18 +- rag/app/resume.py | 87 +- rag/app/table.py | 19 +- rag/nlp/huchunk.py | 18 +- rag/svr/task_broker.py | 2 +- 26 files changed, 8730 insertions(+), 136 deletions(-) create mode 100644 deepdoc/__init__.py rename {rag => deepdoc}/parser/__init__.py (99%) rename {rag => deepdoc}/parser/docx_parser.py (100%) rename {rag => deepdoc}/parser/excel_parser.py (100%) rename {rag => deepdoc}/parser/pdf_parser.py (98%) create mode 100644 deepdoc/visual/__init__.py create mode 100644 deepdoc/visual/ocr.py create mode 100644 deepdoc/visual/ocr.res create mode 100644 deepdoc/visual/operators.py create mode 100644 deepdoc/visual/postprocess.py create mode 100644 deepdoc/visual/recognizer.py create mode 100644 deepdoc/visual/seeit.py diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index ad1745a..e6e33d0 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -198,7 +198,7 @@ def chat(dialog, messages, **kwargs): return {"answer": prompt_config["empty_response"], "retrieval": kbinfos} kwargs["knowledge"] = "\n".join(knowledges) - gen_conf = dialog.llm_setting[dialog.llm_setting_type] + gen_conf = dialog.llm_setting msg = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"] used_token_count, msg = message_fit_in(msg, int(llm.max_tokens * 0.97)) if "max_tokens" in gen_conf: diff --git a/api/apps/dialog_app.py b/api/apps/dialog_app.py index 083d412..cc6f981 100644 --- a/api/apps/dialog_app.py +++ b/api/apps/dialog_app.py @@ -33,38 +33,17 @@ def set_dialog(): name = req.get("name", "New Dialog") description = req.get("description", "A helpful Dialog") language = req.get("language", "Chinese") - llm_setting_type = req.get("llm_setting_type", "Precise") + top_n = req.get("top_n", 6) + similarity_threshold = req.get("similarity_threshold", 0.1) + vector_similarity_weight = req.get("vector_similarity_weight", 0.3) llm_setting = req.get("llm_setting", { - "Creative": { - "temperature": 0.9, - "top_p": 0.9, - "frequency_penalty": 0.2, - "presence_penalty": 0.4, - "max_tokens": 512 - }, - "Precise": { - "temperature": 0.1, - "top_p": 0.3, - "frequency_penalty": 0.7, - "presence_penalty": 0.4, - "max_tokens": 215 - }, - "Evenly": { - "temperature": 0.5, - "top_p": 0.5, - "frequency_penalty": 0.7, - "presence_penalty": 0.4, - "max_tokens": 215 - }, - "Custom": { - "temperature": 0.2, - "top_p": 0.3, - "frequency_penalty": 0.6, - "presence_penalty": 0.3, - "max_tokens": 215 - }, + "temperature": 0.1, + "top_p": 0.3, + "frequency_penalty": 0.7, + "presence_penalty": 0.4, + "max_tokens": 215 }) - prompt_config = req.get("prompt_config", { + default_prompt = { "system": """ä˝ ćŻä¸€ä¸Şć™şč˝ĺŠ©ć‰‹ďĽŚčŻ·ć€»ç»“知识库的内容来回ç”é—®é˘ďĽŚčŻ·ĺ—举知识库ä¸çš„数据详细回ç”。当所有知识库内容é˝ä¸Žé—®é˘ć— ĺ…łć—¶ďĽŚä˝ çš„ĺ›žç”必须包括“知识库ä¸ćśŞć‰ľĺ°ć‚¨č¦çš„ç”ćˇďĽâ€ťčż™ĺŹĄčŻťă€‚回ç”需č¦č€č™‘čŠĺ¤©ĺŽ†ĺŹ˛ă€‚ 以下ćŻçźĄčŻ†ĺş“: {knowledge} @@ -74,30 +53,40 @@ def set_dialog(): {"key": "knowledge", "optional": False} ], "empty_response": "Sorry! 知识库ä¸ćśŞć‰ľĺ°ç›¸ĺ…łĺ†…容ďĽ" - }) + } + prompt_config = req.get("prompt_config", default_prompt) - if len(prompt_config["parameters"]) < 1: - return get_data_error_result(retmsg="'knowledge' should be in parameters") + if not prompt_config["system"]: prompt_config["system"] = default_prompt["system"] + # if len(prompt_config["parameters"]) < 1: + # prompt_config["parameters"] = default_prompt["parameters"] + # for p in prompt_config["parameters"]: + # if p["key"] == "knowledge":break + # else: prompt_config["parameters"].append(default_prompt["parameters"][0]) for p in prompt_config["parameters"]: - if prompt_config["system"].find("{%s}"%p["key"]) < 0: + if p["optional"]: continue + if prompt_config["system"].find("{%s}" % p["key"]) < 0: return get_data_error_result(retmsg="Parameter '{}' is not used".format(p["key"])) try: e, tenant = TenantService.get_by_id(current_user.id) - if not e:return get_data_error_result(retmsg="Tenant not found!") + if not e: return get_data_error_result(retmsg="Tenant not found!") llm_id = req.get("llm_id", tenant.llm_id) if not dialog_id: + if not req.get("kb_ids"):return get_data_error_result(retmsg="Fail! Please select knowledgebase!") dia = { "id": get_uuid(), "tenant_id": current_user.id, "name": name, + "kb_ids": req["kb_ids"], "description": description, "language": language, "llm_id": llm_id, - "llm_setting_type": llm_setting_type, "llm_setting": llm_setting, - "prompt_config": prompt_config + "prompt_config": prompt_config, + "top_n": top_n, + "similarity_threshold": similarity_threshold, + "vector_similarity_weight": vector_similarity_weight } if not DialogService.save(**dia): return get_data_error_result(retmsg="Fail to new a dialog!") e, dia = DialogService.get_by_id(dia["id"]) @@ -122,7 +111,7 @@ def set_dialog(): def get(): dialog_id = request.args["dialog_id"] try: - e,dia = DialogService.get_by_id(dialog_id) + e, dia = DialogService.get_by_id(dialog_id) if not e: return get_data_error_result(retmsg="Dialog not found!") dia = dia.to_dict() dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) @@ -130,20 +119,22 @@ def get(): except Exception as e: return server_error_response(e) + def get_kb_names(kb_ids): ids, nms = [], [] for kid in kb_ids: e, kb = KnowledgebaseService.get_by_id(kid) - if not e or kb.status != StatusEnum.VALID.value:continue + if not e or kb.status != StatusEnum.VALID.value: continue ids.append(kid) nms.append(kb.name) return ids, nms + @manager.route('/list', methods=['GET']) @login_required def list(): try: - diags = DialogService.query(tenant_id=current_user.id, status=StatusEnum.VALID.value) + diags = DialogService.query(tenant_id=current_user.id, status=StatusEnum.VALID.value, reverse=True, order_by=DialogService.model.create_time) diags = [d.to_dict() for d in diags] for d in diags: d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"]) @@ -154,12 +145,11 @@ def list(): @manager.route('/rm', methods=['POST']) @login_required -@validate_request("dialog_id") +@validate_request("dialog_ids") def rm(): req = request.json try: - if not DialogService.update_by_id(req["dialog_id"], {"status": StatusEnum.INVALID.value}): - return get_data_error_result(retmsg="Dialog not found!") + DialogService.update_many_by_id([{"id": id, "status": StatusEnum.INVALID.value} for id in req["dialog_ids"]]) return get_json_result(data=True) except Exception as e: - return server_error_response(e) \ No newline at end of file + return server_error_response(e) diff --git a/api/db/db_models.py b/api/db/db_models.py index 282a566..0e032fc 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -529,8 +529,6 @@ class Dialog(DataBaseModel): icon = CharField(max_length=16, null=False, help_text="dialog icon") language = CharField(max_length=32, null=True, default="Chinese", help_text="English|Chinese") llm_id = CharField(max_length=32, null=False, help_text="default llm ID") - llm_setting_type = CharField(max_length=8, null=False, help_text="Creative|Precise|Evenly|Custom", - default="Creative") llm_setting = JSONField(null=False, default={"temperature": 0.1, "top_p": 0.3, "frequency_penalty": 0.7, "presence_penalty": 0.4, "max_tokens": 215}) prompt_type = CharField(max_length=16, null=False, default="simple", help_text="simple|advanced") diff --git a/deepdoc/__init__.py b/deepdoc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rag/parser/__init__.py b/deepdoc/parser/__init__.py similarity index 99% rename from rag/parser/__init__.py rename to deepdoc/parser/__init__.py index d2b499c..886f4ab 100644 --- a/rag/parser/__init__.py +++ b/deepdoc/parser/__init__.py @@ -1,4 +1,3 @@ -import copy import random from .pdf_parser import HuParser as PdfParser @@ -10,7 +9,7 @@ import re from nltk import word_tokenize from rag.nlp import stemmer, huqie -from ..utils import num_tokens_from_string +from rag.utils import num_tokens_from_string BULLET_PATTERN = [[ r"第[零一二三四五ĺ…ä¸ĺ…«äąťĺŤç™ľ0-9]+(ĺ†?编|é¨ĺ†)", diff --git a/rag/parser/docx_parser.py b/deepdoc/parser/docx_parser.py similarity index 100% rename from rag/parser/docx_parser.py rename to deepdoc/parser/docx_parser.py diff --git a/rag/parser/excel_parser.py b/deepdoc/parser/excel_parser.py similarity index 100% rename from rag/parser/excel_parser.py rename to deepdoc/parser/excel_parser.py diff --git a/rag/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py similarity index 98% rename from rag/parser/pdf_parser.py rename to deepdoc/parser/pdf_parser.py index 79611a7..576687b 100644 --- a/rag/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os import random -from functools import partial import fitz import requests @@ -15,6 +14,7 @@ from PIL import Image import numpy as np from api.db import ParserType +from deepdoc.visual import OCR, Recognizer from rag.nlp import huqie from collections import Counter from copy import deepcopy @@ -26,13 +26,32 @@ logging.getLogger("pdfminer").setLevel(logging.WARNING) class HuParser: def __init__(self): - from paddleocr import PaddleOCR - logging.getLogger("ppocr").setLevel(logging.ERROR) - self.ocr = PaddleOCR(use_angle_cls=False, lang="ch") + self.ocr = OCR() if not hasattr(self, "model_speciess"): self.model_speciess = ParserType.GENERAL.value - self.layouter = partial(self.__remote_call, self.model_speciess) - self.tbl_det = partial(self.__remote_call, "table_component") + self.layout_labels = [ + "_background_", + "Text", + "Title", + "Figure", + "Figure caption", + "Table", + "Table caption", + "Header", + "Footer", + "Reference", + "Equation", + ] + self.tsr_labels = [ + "table", + "table column", + "table row", + "table column header", + "table projected row header", + "table spanning cell", + ] + self.layouter = Recognizer(self.layout_labels, "layout", "/data/newpeak/medical-gpt/res/ppdet/") + self.tbl_det = Recognizer(self.tsr_labels, "tsr", "/data/newpeak/medical-gpt/res/ppdet.tbl/") self.updown_cnt_mdl = xgb.Booster() if torch.cuda.is_available(): @@ -56,7 +75,7 @@ class HuParser: token = os.environ.get("INFINIFLOW_TOKEN") if not url or not token: logging.warning("INFINIFLOW_SERVER is not specified. To maximize the effectiveness, please visit https://github.com/infiniflow/ragflow, and sign in the our demo web site to get token. It's FREE! Using 'export' to set both environment variables: INFINIFLOW_SERVER and INFINIFLOW_TOKEN.") - return [] + return [[] for _ in range(len(images))] def convert_image_to_bytes(PILimage): image = BytesIO() @@ -382,7 +401,7 @@ class HuParser: return layouts - def __table_paddle(self, images): + def __table_tsr(self, images): tbls = self.tbl_det(images, thr=0.5) res = [] # align left&right for rows, align top&bottom for columns @@ -452,7 +471,7 @@ class HuParser: assert len(self.page_images) == len(tbcnt) - 1 if not imgs: return - recos = self.__table_paddle(imgs) + recos = self.__table_tsr(imgs) tbcnt = np.cumsum(tbcnt) for i in range(len(tbcnt) - 1): # for page pg = [] @@ -517,8 +536,8 @@ class HuParser: b["H_right"] = spans[ii]["x1"] b["SP"] = ii - def __ocr_paddle(self, pagenum, img, chars, ZM=3): - bxs = self.ocr.ocr(np.array(img), cls=True)[0] + def __ocr(self, pagenum, img, chars, ZM=3): + bxs = self.ocr(np.array(img)) if not bxs: self.boxes.append([]) return @@ -557,11 +576,12 @@ class HuParser: self.boxes.append(bxs) - def _layouts_paddle(self, ZM): + def _layouts_rec(self, ZM): assert len(self.page_images) == len(self.boxes) # Tag layout type boxes = [] layouts = self.layouter(self.page_images) + #save_results(self.page_images, layouts, self.layout_labels, output_dir='output/', threshold=0.7) assert len(self.page_images) == len(layouts) for pn, lts in enumerate(layouts): bxs = self.boxes[pn] @@ -1741,7 +1761,7 @@ class HuParser: # else: # self.page_cum_height.append( # np.max([c["bottom"] for c in chars])) - self.__ocr_paddle(i + 1, img, chars, zoomin) + self.__ocr(i + 1, img, chars, zoomin) if not self.is_english and not any([c for c in self.page_chars]) and self.boxes: bxes = [b for bxs in self.boxes for b in bxs] @@ -1754,7 +1774,7 @@ class HuParser: def __call__(self, fnm, need_image=True, zoomin=3, return_html=False): self.__images__(fnm, zoomin) - self._layouts_paddle(zoomin) + self._layouts_rec(zoomin) self._table_transformer_job(zoomin) self._text_merge() self._concat_downward() diff --git a/deepdoc/visual/__init__.py b/deepdoc/visual/__init__.py new file mode 100644 index 0000000..e53762a --- /dev/null +++ b/deepdoc/visual/__init__.py @@ -0,0 +1,2 @@ +from .ocr import OCR +from .recognizer import Recognizer \ No newline at end of file diff --git a/deepdoc/visual/ocr.py b/deepdoc/visual/ocr.py new file mode 100644 index 0000000..65b2c2d --- /dev/null +++ b/deepdoc/visual/ocr.py @@ -0,0 +1,561 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import time +import os + +from huggingface_hub import snapshot_download + +from .operators import * +import numpy as np +import onnxruntime as ort + +from api.utils.file_utils import get_project_base_directory +from .postprocess import build_post_process +from rag.settings import cron_logger + + +def transform(data, ops=None): + """ transform """ + if ops is None: + ops = [] + for op in ops: + data = op(data) + if data is None: + return None + return data + + +def create_operators(op_param_list, global_config=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance( + op_param_list, list), ('operator config should be a list') + ops = [] + for operator in op_param_list: + assert isinstance(operator, + dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + if global_config is not None: + param.update(global_config) + op = eval(op_name)(**param) + ops.append(op) + return ops + + +def load_model(model_dir, nm): + model_file_path = os.path.join(model_dir, nm + ".onnx") + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + sess = ort.InferenceSession(model_file_path) + return sess, sess.get_inputs()[0] + + +class TextRecognizer(object): + def __init__(self, model_dir): + self.rec_image_shape = [int(v) for v in "3, 48, 320".split(",")] + self.rec_batch_num = 16 + postprocess_params = { + 'name': 'CTCLabelDecode', + "character_dict_path": os.path.join(get_project_base_directory(), "rag/res", "ocr.res"), + "use_space_char": True + } + self.postprocess_op = build_post_process(postprocess_params) + self.predictor, self.input_tensor = load_model(model_dir, 'rec') + + def resize_norm_img(self, img, max_wh_ratio): + imgC, imgH, imgW = self.rec_image_shape + + assert imgC == img.shape[2] + imgW = int((imgH * max_wh_ratio)) + w = self.input_tensor.shape[3:][0] + if isinstance(w, str): + pass + elif w is not None and w > 0: + imgW = w + h, w = img.shape[:2] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def resize_norm_img_vl(self, img, image_shape): + + imgC, imgH, imgW = image_shape + img = img[:, :, ::-1] # bgr2rgb + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + return resized_image + + def resize_norm_img_srn(self, img, image_shape): + imgC, imgH, imgW = image_shape + + img_black = np.zeros((imgH, imgW)) + im_hei = img.shape[0] + im_wid = img.shape[1] + + if im_wid <= im_hei * 1: + img_new = cv2.resize(img, (imgH * 1, imgH)) + elif im_wid <= im_hei * 2: + img_new = cv2.resize(img, (imgH * 2, imgH)) + elif im_wid <= im_hei * 3: + img_new = cv2.resize(img, (imgH * 3, imgH)) + else: + img_new = cv2.resize(img, (imgW, imgH)) + + img_np = np.asarray(img_new) + img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) + img_black[:, 0:img_np.shape[1]] = img_np + img_black = img_black[:, :, np.newaxis] + + row, col, c = img_black.shape + c = 1 + + return np.reshape(img_black, (c, row, col)).astype(np.float32) + + def srn_other_inputs(self, image_shape, num_heads, max_text_length): + + imgC, imgH, imgW = image_shape + feature_dim = int((imgH / 8) * (imgW / 8)) + + encoder_word_pos = np.array(range(0, feature_dim)).reshape( + (feature_dim, 1)).astype('int64') + gsrm_word_pos = np.array(range(0, max_text_length)).reshape( + (max_text_length, 1)).astype('int64') + + gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) + gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( + [-1, 1, max_text_length, max_text_length]) + gsrm_slf_attn_bias1 = np.tile( + gsrm_slf_attn_bias1, + [1, num_heads, 1, 1]).astype('float32') * [-1e9] + + gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( + [-1, 1, max_text_length, max_text_length]) + gsrm_slf_attn_bias2 = np.tile( + gsrm_slf_attn_bias2, + [1, num_heads, 1, 1]).astype('float32') * [-1e9] + + encoder_word_pos = encoder_word_pos[np.newaxis, :] + gsrm_word_pos = gsrm_word_pos[np.newaxis, :] + + return [ + encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, + gsrm_slf_attn_bias2 + ] + + def process_image_srn(self, img, image_shape, num_heads, max_text_length): + norm_img = self.resize_norm_img_srn(img, image_shape) + norm_img = norm_img[np.newaxis, :] + + [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ + self.srn_other_inputs(image_shape, num_heads, max_text_length) + + gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32) + gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32) + encoder_word_pos = encoder_word_pos.astype(np.int64) + gsrm_word_pos = gsrm_word_pos.astype(np.int64) + + return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, + gsrm_slf_attn_bias2) + + def resize_norm_img_sar(self, img, image_shape, + width_downsample_ratio=0.25): + imgC, imgH, imgW_min, imgW_max = image_shape + h = img.shape[0] + w = img.shape[1] + valid_ratio = 1.0 + # make sure new_width is an integral multiple of width_divisor. + width_divisor = int(1 / width_downsample_ratio) + # resize + ratio = w / float(h) + resize_w = math.ceil(imgH * ratio) + if resize_w % width_divisor != 0: + resize_w = round(resize_w / width_divisor) * width_divisor + if imgW_min is not None: + resize_w = max(imgW_min, resize_w) + if imgW_max is not None: + valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) + resize_w = min(imgW_max, resize_w) + resized_image = cv2.resize(img, (resize_w, imgH)) + resized_image = resized_image.astype('float32') + # norm + if image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + resize_shape = resized_image.shape + padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) + padding_im[:, :, 0:resize_w] = resized_image + pad_shape = padding_im.shape + + return padding_im, resize_shape, pad_shape, valid_ratio + + def resize_norm_img_spin(self, img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + # return padding_im + img = cv2.resize(img, tuple([100, 32]), cv2.INTER_CUBIC) + img = np.array(img, np.float32) + img = np.expand_dims(img, -1) + img = img.transpose((2, 0, 1)) + mean = [127.5] + std = [127.5] + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + mean = np.float32(mean.reshape(1, -1)) + stdinv = 1 / np.float32(std.reshape(1, -1)) + img -= mean + img *= stdinv + return img + + def resize_norm_img_svtr(self, img, image_shape): + + imgC, imgH, imgW = image_shape + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + return resized_image + + def resize_norm_img_abinet(self, img, image_shape): + + imgC, imgH, imgW = image_shape + + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image = resized_image.astype('float32') + resized_image = resized_image / 255. + + mean = np.array([0.485, 0.456, 0.406]) + std = np.array([0.229, 0.224, 0.225]) + resized_image = ( + resized_image - mean[None, None, ...]) / std[None, None, ...] + resized_image = resized_image.transpose((2, 0, 1)) + resized_image = resized_image.astype('float32') + + return resized_image + + def norm_img_can(self, img, image_shape): + + img = cv2.cvtColor( + img, cv2.COLOR_BGR2GRAY) # CAN only predict gray scale image + + if self.rec_image_shape[0] == 1: + h, w = img.shape + _, imgH, imgW = self.rec_image_shape + if h < imgH or w < imgW: + padding_h = max(imgH - h, 0) + padding_w = max(imgW - w, 0) + img_padded = np.pad(img, ((0, padding_h), (0, padding_w)), + 'constant', + constant_values=(255)) + img = img_padded + + img = np.expand_dims(img, 0) / 255.0 # h,w,c -> c,h,w + img = img.astype('float32') + + return img + + def __call__(self, img_list): + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the recognition process + indices = np.argsort(np.array(width_list)) + rec_res = [['', 0.0]] * img_num + batch_num = self.rec_batch_num + st = time.time() + + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + imgC, imgH, imgW = self.rec_image_shape[:3] + max_wh_ratio = imgW / imgH + # max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(img_list[indices[ino]], + max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(None, input_dict) + preds = outputs[0] + rec_result = self.postprocess_op(preds) + for rno in range(len(rec_result)): + rec_res[indices[beg_img_no + rno]] = rec_result[rno] + + return rec_res, time.time() - st + + +class TextDetector(object): + def __init__(self, model_dir): + pre_process_list = [{ + 'DetResizeForTest': { + 'limit_side_len': 960, + 'limit_type': "max", + } + }, { + 'NormalizeImage': { + 'std': [0.229, 0.224, 0.225], + 'mean': [0.485, 0.456, 0.406], + 'scale': '1./255.', + 'order': 'hwc' + } + }, { + 'ToCHWImage': None + }, { + 'KeepKeys': { + 'keep_keys': ['image', 'shape'] + } + }] + postprocess_params = {"name": "DBPostProcess", "thresh": 0.3, "box_thresh": 0.6, "max_candidates": 1000, + "unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"} + + self.postprocess_op = build_post_process(postprocess_params) + self.predictor, self.input_tensor = load_model(model_dir, 'det') + + img_h, img_w = self.input_tensor.shape[2:] + if isinstance(img_h, str) or isinstance(img_w, str): + pass + elif img_h is not None and img_w is not None and img_h > 0 and img_w > 0: + pre_process_list[0] = { + 'DetResizeForTest': { + 'image_shape': [img_h, img_w] + } + } + self.preprocess_op = create_operators(pre_process_list) + + def order_points_clockwise(self, pts): + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0) + diff = np.diff(np.array(tmp), axis=1) + rect[1] = tmp[np.argmin(diff)] + rect[3] = tmp[np.argmax(diff)] + return rect + + def clip_det_res(self, points, img_height, img_width): + for pno in range(points.shape[0]): + points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) + points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) + return points + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + if isinstance(box, list): + box = np.array(box) + box = self.order_points_clockwise(box) + box = self.clip_det_res(box, img_height, img_width) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 3 or rect_height <= 3: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + if isinstance(box, list): + box = np.array(box) + box = self.clip_det_res(box, img_height, img_width) + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def __call__(self, img): + ori_im = img.copy() + data = {'image': img} + + st = time.time() + data = transform(data, self.preprocess_op) + img, shape_list = data + if img is None: + return None, 0 + img = np.expand_dims(img, axis=0) + shape_list = np.expand_dims(shape_list, axis=0) + img = img.copy() + input_dict = {} + input_dict[self.input_tensor.name] = img + outputs = self.predictor.run(None, input_dict) + + post_result = self.postprocess_op({"maps": outputs[0]}, shape_list) + dt_boxes = post_result[0]['points'] + dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) + + return dt_boxes, time.time() - st + + +class OCR(object): + def __init__(self, model_dir=None): + """ + If you have trouble downloading HuggingFace models, -_^ this might help!! + + For Linux: + export HF_ENDPOINT=https://hf-mirror.com + + For Windows: + Good luck + ^_- + + """ + if not model_dir: + model_dir = snapshot_download(repo_id="InfiniFlow/ocr") + + self.text_detector = TextDetector(model_dir) + self.text_recognizer = TextRecognizer(model_dir) + self.drop_score = 0.5 + self.crop_image_res_index = 0 + + def get_rotate_crop_image(self, img, points): + ''' + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + ''' + assert len(points) == 4, "shape of points must be 4*2" + img_crop_width = int( + max( + np.linalg.norm(points[0] - points[1]), + np.linalg.norm(points[2] - points[3]))) + img_crop_height = int( + max( + np.linalg.norm(points[0] - points[3]), + np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], + [img_crop_width, img_crop_height], + [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective( + img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img + + def sorted_boxes(self, dt_boxes): + """ + Sort text boxes in order from top to bottom, left to right + args: + dt_boxes(array):detected text boxes with shape [4, 2] + return: + sorted boxes(array) with shape [4, 2] + """ + num_boxes = dt_boxes.shape[0] + sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) + _boxes = list(sorted_boxes) + + for i in range(num_boxes - 1): + for j in range(i, -1, -1): + if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \ + (_boxes[j + 1][0][0] < _boxes[j][0][0]): + tmp = _boxes[j] + _boxes[j] = _boxes[j + 1] + _boxes[j + 1] = tmp + else: + break + return _boxes + + def __call__(self, img, cls=True): + time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0} + + if img is None: + return None, None, time_dict + + start = time.time() + ori_im = img.copy() + dt_boxes, elapse = self.text_detector(img) + time_dict['det'] = elapse + + if dt_boxes is None: + end = time.time() + time_dict['all'] = end - start + return None, None, time_dict + else: + cron_logger.debug("dt_boxes num : {}, elapsed : {}".format( + len(dt_boxes), elapse)) + img_crop_list = [] + + dt_boxes = self.sorted_boxes(dt_boxes) + + for bno in range(len(dt_boxes)): + tmp_box = copy.deepcopy(dt_boxes[bno]) + img_crop = self.get_rotate_crop_image(ori_im, tmp_box) + img_crop_list.append(img_crop) + + rec_res, elapse = self.text_recognizer(img_crop_list) + time_dict['rec'] = elapse + cron_logger.debug("rec_res num : {}, elapsed : {}".format( + len(rec_res), elapse)) + + filter_boxes, filter_rec_res = [], [] + for box, rec_result in zip(dt_boxes, rec_res): + text, score = rec_result + if score >= self.drop_score: + filter_boxes.append(box) + filter_rec_res.append(rec_result) + end = time.time() + time_dict['all'] = end - start + + #for bno in range(len(img_crop_list)): + # print(f"{bno}, {rec_res[bno]}") + + return list(zip([a.tolist() for a in filter_boxes], filter_rec_res)) diff --git a/deepdoc/visual/ocr.res b/deepdoc/visual/ocr.res new file mode 100644 index 0000000..b62de66 --- /dev/null +++ b/deepdoc/visual/ocr.res @@ -0,0 +1,6623 @@ +' +ç–— +绚 +诚 +娇 +ćşś +é˘ +č´ż +者 +ĺ»– +ć›´ +çşł +ĺŠ +奉 +ĺ…¬ +一 +ĺ°± +ć±´ +计 +与 +č·Ż +ćż +原 +妇 +2 +0 +8 +- +7 +ĺ…¶ +> +: +] +, +, +骑 +ĺ +ĺ…¨ +ć¶ +ćŹ +ĺ‚ +安 +äą… +é’ź +ĺ—… +不 +ĺ˝± +处 +é©˝ +čśż +资 +ĺ…ł +椤 +ĺś° +ç¸ +专 +é—® +ĺż– +票 +嫉 +ç‚Ž +éźµ +č¦ +ćś +ç”° +节 +陂 +é„™ +捌 +备 +ć‹ł +伺 +眼 +网 +盎 +大 +ĺ‚Ť +ĺż +东 +愉 +汇 +čąż +科 +每 +业 +里 +čŞ +晏 +ĺ— +ĺął +录 +ĺ… +1 +3 +彤 +鲶 +产 +稍 +督 +č…´ +有 +象 +岳 +注 +绍 +在 +ćłş +ć–‡ +定 +ć ¸ +ĺŤ +ć°´ +过 +ç† +让 +ĺ· +率 +ç‰ +čż™ +发 +” +为 +ĺ« +č‚Ą +é…‰ +相 +鄱 +ä¸ +编 +猥 +é”› +ć—Ą +é•€ +č’‚ +掰 +倒 +辆 +ć ľ +ć — +综 +涩 +ĺ·ž +雌 +滑 +馀 +了 +ćśş +ĺť— +司 +ĺ®° +ç”™ +ĺ…´ +çź˝ +抚 +äżť +用 +沧 +秩 +如 +收 +ćŻ +滥 +页 +ç–‘ +ĺź +! +ďĽ +姥 +异 +ć©ą +é’‡ +ĺ‘ +下 +č·„ +çš„ +椴 +沫 +ĺ›˝ +绥 +çŤ +报 +开 +ć°‘ +蜇 +何 +ĺ† +凇 +é•ż +讥 +č—Ź +掏 +ć–˝ +çľ˝ +ä¸ +讲 +ć´ľ +ĺź +äşş +ćŹ +浼 +é—´ +世 +而 +古 +多 +倪 +唇 +饯 +控 +ĺşš +首 +čµ› +čś“ +ĺ‘ł +ć– +ĺ¶ +觉 +技 +替 +艰 +溢 +ć˝® +夕 +é’ş +外 +ć‘ +ćž‹ +动 +双 +单 +ĺ•® +ć· +枇 +确 +锦 +曜 +ćťś +ć– +č˝ +ć• +éśś +ç›’ +然 +äľ— +电 +ć™ +放 +ćĄ +éą +ć–° +ćť– +čś‚ +ĺ’ +ćż‚ +瞬 +评 +总 +隍 +对 +独 +ĺ +äąź +ćŻ +ĺşś +éť’ +天 +诲 +墙 +组 +ć»´ +级 +é‚€ +ĺ¸ +示 +ĺ·˛ +ć—¶ +骸 +仄 +ćł… +ĺ’Ś +é¨ +ĺş— +雇 +ç–« +ćŚ +ĺ·Ť +踮 +ĺ˘ +只 +亨 +ç›® +鉴 +ĺ´¤ +é—˛ +体 +ćł„ +ćť‚ +作 +č¬ +č˝° +化 +解 +čż‚ +诿 +č› +ç’€ +č…ľ +ĺ‘Š +ç‰ +服 +çś +ĺ¸ +ĺ°Ź +规 +程 +çşż +ćµ· +办 +引 +二 +桧 +牌 +ç ş +ć´„ +裴 +äż® +图 +ç—« +čˇ +许 +犊 +äş‹ +é› +ĺźş +ćź´ +ĺ‘Ľ +食 +ç ” +奶 +ĺľ‹ +蛋 +ĺ› +葆 +察 +ćŹ +褒 +ć’ +再 +李 +éŞ +ĺ·Ą +貂 +油 +éą… +ç« +ĺ•„ +休 +ĺśş +ç»™ +睡 +çş· +豆 +器 +捎 +说 +ć•Ź +ĺ¦ +会 +ćµ’ +设 +诊 +ć Ľ +廓 +查 +来 +éś“ +室 +溆 +ďż +诡 +寥 +ç„• +čś +ćź’ +ç‹ +回 +ćź +ç ľ +厄 +实 +çż© +ĺ°ż +äş” +ĺ…Ą +ĺľ„ +ć +ĺ–ą +股 +宇 +篝 +| +; +美 +ćśź +äş‘ +äąť +祺 +扮 +éť +锝 +槌 +çł» +äĽ +é…° +éŠ +ćš‚ +čš• +ĺż» +č± +本 +çľą +执 +条 +é’¦ +H +獒 +é™ +čż› +ĺŁ +楦 +于 +čŠ +玖 +é“‹ +茯 +未 +ç” +ç˛ +括 +ć · +精 +ć¬ +矢 +甥 +帷 +嵩 +扣 +令 +ä»” +风 +çš +行 +支 +é¨ +蓉 +ĺ® +ç«™ +蜡 +ć•‘ +é’Š +ć±— +ćťľ +ĺ«Ś +ć +可 +. +鹤 +院 +从 +交 +政 +怕 +ć´» +č° +ç +ĺ±€ +验 +é«Ś +第 +éź« +č°— +串 +ĺ° +圆 +ĺą´ +米 +/ +* +友 +ĺżż +检 +区 +çś‹ +自 +敢 +ĺ +个 +ĺ…ą +弄 +ćµ +ç•™ +ĺŚ +没 +齿 +ćź +č† +轼 +ćą– +什 +三 +建 +č›” +ĺ„ż +椋 +汕 +震 +颧 +鲤 +č·ź +力 +ć… +ç’ş +铨 +陪 +务 +指 +ć—Ź +č® +滦 +é„Ł +ćż® +扒 +商 +ç®± +ĺŤ +召 +ć…· +čľ— +所 +莞 +管 +护 +č‡ +横 +硒 +ĺ—“ +接 +侦 +ĺ… +露 +ĺ…š +馋 +é©ľ +剖 +é« +侬 +妪 +ĺą‚ +猗 +绺 +éŞ +央 +é… +ĺť +çť +课 +徇 +缰 +é—¨ +ç”· +西 +项 +句 +č°™ +çž’ +ç§ +篇 +ć•™ +碲 +罚 +声 +ĺ‘ +景 +前 +富 +ĺ´ +鳌 +稀 +ĺ…Ť +ćś‹ +啬 +çť +去 +čµ +鱼 +住 +č‚© +ć„• +速 +ć— +波 +厅 +ĺĄ +茼 +厥 +鲟 +č°… +投 +攸 +ç‚” +ć•° +ć–ą +击 +ĺ‘‹ +č° +绩 +ĺ« +ć„« +ĺš +躬 +鹧 +čŞ +ç‚ł +ć‹› +ĺ–‡ +膨 +ćłµ +蹦 +毛 +结 +5 +4 +č°± +识 +陕 +粽 +ĺ©š +ć‹ź +ćž„ +且 +ćś +ä»» +ć˝ +比 +é˘ +妨 +醪 +陀 +桔 +ç˘ +扎 +选 +ĺ“ +骷 +楷 +äşż +ćŽ +缆 +č„Ż +监 +çť« +逻 +婵 +ĺ…± +čµ´ +ć·ť +凡 +ć¦ +及 +čľľ +揖 +č°© +ćľą +减 +ç„° +蛹 +ç•Ş +çĄ +柏 +ĺ‘ +禄 +怡 +峤 +éľ™ +ç™˝ +叽 +生 +é—Ż +čµ· +细 +装 +č°• +ç«ź +čš +é’™ +上 +导 +渊 +按 +艾 +čľ +挡 +耒 +盹 +饪 +臀 +č®° +é‚® +č•™ +受 +ĺ„ +医 +ć‚ +ć™® +滇 +ćś— +茸 +带 +çż» +é…š +( +ĺ…‰ +ĺ ¤ +墟 +č”· +万 +ĺą» +〓 +ç‘™ +čľ +ć§ +盏 +äş +蛀 +ĺ‰ +é“° +请 +ĺ +ĺ‡ +é—» +税 +äş• +诩 +哨 +ĺ«‚ +好 +面 +ç +ć ˇ +馊 +鬣 +缂 +čĄ +访 +ç‚– +ĺŤ +农 +缀 +ĺ¦ +经 +é’š +棵 +趟 +ĺĽ +äşź +ĺŹ +茶 +č°¨ +捻 +论 +迸 +ĺ ‚ +玉 +信 +ĺ§ +çž +乡 +姬 +寺 +ĺ’¬ +溏 +č‹„ +çšż +ć„Ź +赉 +宝 +ĺ°” +é’° +艺 +特 +唳 +踉 +é˝ +荣 +倚 +ç™» +čŤ +丧 +奇 +涵 +批 +ç‚ +čż‘ +符 +ĺ‚© +ć„ź +é“ +着 +菊 +虹 +仲 +众 +ć‡ +濯 +颞 +çśş +南 +释 +北 +缝 +ć ‡ +ć—˘ +茗 +ć•´ +ć’Ľ +迤 +č´˛ +挎 +耱 +ć‹’ +ćź +妍 +卫 +哇 +英 +矶 +č—© +治 +ä»– +ĺ… +领 +膜 +é® +ç©— +蛾 +飞 +荒 +棺 +劫 +äą +市 +ç« +温 +ć‹ +棚 +ć´Ľ +转 +ćžś +奕 +卸 +迪 +伸 +ćłł +ć–— +邡 +äľ„ +涨 +屯 +č‹ +č +ć°ˇ +ĺ´® +ćžž +ć§ +冒 +彩 +ć–ś +手 +豚 +随 +ć— +ć·‘ +妞 +形 +菌 +ĺ˛ +沱 +争 +é©Ż +ćą +挟 +ĺ…† +ćź± +äĽ +至 +包 +内 +ĺ“Ť +临 +红 +功 +弩 +衡 +寂 +ç¦ +č€ +棍 +耆 +渍 +织 +害 +ć°µ +渑 +ĺ¸ +č˝˝ +靥 +ĺ—¬ +č™˝ +č‹ą +ĺ’¨ +娄 +ĺş“ +雉 +榜 +帜 +ĺ˛ +套 +ç‘š +亲 +ç°¸ +欧 +čľą +6 +č…ż +ć—® +抛 +ĺą +çžł +ĺľ— +é•“ +梗 +厨 +继 +漾 +ć„Ł +憨 +士 +ç– +窑 +抑 +躯 +襟 +č„Ź +参 +č´¸ +言 +干 +绸 +éł„ +ç©· +č—ś +éźł +ćŠ +详 +) +举 +ć‚Ť +甸 +癌 +黎 +č°´ +ć» +罩 +čż +寒 +é©· +袖 +媒 +č’‹ +ćŽ +模 +çş +ćŁ +观 +祖 +蛆 +碍 +位 +稿 +主 +澧 +č·Ś +çŹ +京 +锏 +帝 +č´´ +čŻ +çł +才 +黄 +鲸 +ç•Ą +ç‚Ż +饱 +ĺ›› +出 +ĺ› +犀 +牧 +容 +汉 +杆 +ćµ +ć±° +ç‘· +é€ +虫 +ç© +怪 +é©´ +济 +ĺş” +花 +沣 +č°” +夙 +ć—… +ä»· +çźż +以 +č€ +s +u +呦 +ć™’ +ĺ·ˇ +茅 +准 +č‚ź +ç“´ +č©ą +仟 +褂 +译 +桌 +ć·· +ĺ® +怦 +é‘ +抿 +äş› +ä˝™ +é„‚ +饴 +ć”’ +珑 +群 +é– +岔 +ç¨ +č—“ +预 +环 +ć´® +岌 +宀 +杲 +瀵 +最 +常 +囡 +周 +踊 +女 +鼓 +č˘ +ĺ–‰ +简 +čŚ +č–Ż +é +ç–Ź +粱 +黜 +禧 +ćł• +ç®” +ć–¤ +éĄ +汝 +奥 +ç›´ +č´ž +ć’‘ +ç˝® +ç»± +集 +她 +馅 +逗 +é’§ +橱 +é‰ +[ +ć™ +čş +唤 +9 +ć—ş +č† +ĺľ… +č„ľ +ć« +č´ +ĺ— +äľť +盲 +度 +çż +č – +äżľ +äą‹ +é•— +拇 +鲵 +厝 +ç°§ +ç» +款 +展 +ĺ• +表 +剔 +ĺ“ +é’» +č… +损 +清 +锶 +统 +涌 +寸 +滨 +č´Ş +é“ľ +ĺ +ĺ† +伎 +迥 +ĺ’Ź +ĺ +č§ +é˛ +čż… +失 +汾 +é” +逵 +绀 +蔑 +ĺ— +ĺ·ť +ĺ‡ +努 +熨 +揪 +ĺ© +äż± +绉 +抢 +鸨 +ć‘ +即 +č´Ł +膦 +ć“ +毓 +鹊 +ĺą +玷 +岿 +ç©ş +ĺž +绊 +排 +术 +估 +é”· +čżť +们 +č‹ź +é“ś +ć’ +č‚ +件 +ç« +审 +鲂 +ĺąż +ĺŹ +é“Ś +ć° +é“ź +ĺ·ł +čŤ +鲍 +ĺş· +憧 +色 +ć˘ +ćł +ć‹· +ĺ°¤ +ç–ł +知 +S +Y +F +D +A +ĺł„ +裕 +帮 +握 +ć” +ć° +ć° +éšľ +墒 +沮 +雨 +ĺŹ +缥 +ć‚´ +č— +ćą« +娟 +č‹‘ +ç¨ +颛 +ç°‡ +ĺŽ +é• +é— +蕤 +缚 +怎 +佞 +ç +ĺ¤ +蔡 +ç—Š +č± +螯 +帕 +赫 +ćµ +升 +ç¬ +岫 +〠+ç–µ +čś» +é« +蕨 +隶 +ç› +械 +丑 +盂 +ć˘ +强 +鲛 +ç”± +ć‹ +揉 +ĺŠ +éľź +ć’¤ +é’© +ĺ‘• +ĺ› +č´ą +妻 +漂 +求 +é‘ +ĺ´– +秤 +ç” +通 +ć·± +补 +čµ +坎 +床 +ĺ•Ş +承 +ĺĽ +量 +暇 +é’Ľ +ç¨ +é‚ +ć“Ž +脱 +逮 +称 +P +神 +属 +çź— +华 +届 +ç‹Ť +č‘‘ +汹 +育 +ć‚Ł +窒 +č›° +佼 +éť™ +槎 +čż +éł— +庆 +逝 +曼 +ç–± +ĺ…‹ +代 +ĺ® +ć¤ +麸 +耧 +蚌 +晟 +äľ‹ +础 +榛 +副 +测 +ĺ”° +缢 +čżą +ç¬ +éś +čş« +ĺ˛ +čµ +扛 +ĺŹ +菡 +äąś +雾 +ćťż +读 +é™· +徉 +č´Ż +é +虑 +ĺŹ +é’“ +菜 +ĺśľ +现 +ç˘ +式 +äą +ç»´ +渔 +浜 +ĺ·¦ +ĺľ +č„‘ +é’ˇ +č¦ +T +啵 +ć‹´ +ĺŚ +漱 +ćąż +硕 +ć˘ +骼 +é„ +积 +燥 +č” +踢 +玛 +ĺ™ +窿 +č§ +振 +ç•ż +é€ +çŹ +é’˝ +您 +čµµ +ĺ¨ +印 +讨 +踝 +籍 +č°ˇ +čŚ +ĺ´§ +ć±˝ +č”˝ +沪 +é…Ą +ç»’ +怖 +č´˘ +帖 +肱 +ç§ +莎 +ĺ‹‹ +çľ” +霸 +励 +ĺ“Ľ +ĺ¸ +ĺ°† +帅 +ć¸ +纪 +ĺ©´ +娩 +ĺ˛ +ĺŽ +滕 +ĺ» +伤 +ĺťť +ĺ† +ćŠ +隆 +ç +介 +涧 +物 +黍 +并 +姗 +奢 +čą‘ +掣 +垸 +é”´ +ĺ‘˝ +箍 +捉 +ç—… +čľ– +ç° +çś +čż© +č‰ +绌 +çą +寅 +č‹Ą +毋 +思 +诉 +ç±» +čŻ +燮 +轲 +é…® +ç‹‚ +重 +反 +čŚ +ç± +县 +委 +磕 +绣 +奖 +晋 +濉 +ĺż— +ĺľ˝ +č‚ +ĺ‘ +çŤ +ĺť» +口 +片 +碰 +ĺ‡ +ćť‘ +ćźż +劳 +ć–™ +获 +äş© +ć• +晕 +厌 +号 +罢 +ć± +ćŁ +鏖 +ç…¨ +家 +棕 +复 +ĺ°ť +懋 +蜥 +é”… +岛 +扰 +éź +ĺť +çľ +é’¬ +@ +卧 +ç–Ł +镇 +č¬ +冰 +ĺ˝· +频 +黯 +据 +ĺž„ +采 +ĺ…« +缪 +ç« +ĺž‹ +熹 +ç ° +ćĄ +čĄ +ç® +但 +ĺ¶ +绳 +啤 +ć‹Ť +盥 +穆 +傲 +ć´— +盯 +ĺˇ +怔 +ç› +丿 +台 +ć’ +ĺ–‚ +č‘› +ć°¸ +¥ +çź +é…’ +桦 +书 +ç ‚ +čšť +缉 +ć€ +瀚 +袄 +ĺśł +č˝» +č›› +超 +榧 +é› +姒 +ĺĄ +é“® +右 +荽 +ćś› +ĺ» +卡 +丶 +ć°° +附 +ĺš +éť© +ç´˘ +ćš +坨 +桷 +ĺ” +ĺž… +榻 +ĺ˛ +ĺŽ +ĺť› +莨 +ĺ±± +殊 +ĺľ® +骇 +é™ +ç¨ +推 +ĺ—ť +é©ą +澡 +č— +呤 +卤 +ĺ» +çł… +逛 +äľµ +é“ +é…Ś +ĺľ· +摇 +※ +é¬ +被 +ć…¨ +殡 +羸 +ćŚ +泡 +ć› +éž‹ +河 +宪 +沿 +玲 +鲨 +çż… +ĺ“˝ +ćş +é“… +čŻ +ç…§ +é‚Ż +址 +čŤ +佬 +顺 +鸳 +町 +éś +çťľ +瓢 +夸 +ć¤ +晓 +é…ż +ç— +ĺ’” +侏 +ĺ¸ +噎 +湍 +çľ +ĺš· +离 +ĺŤ +ĺ°š +社 +锤 +čŚ +ĺź +使 +浪 +缦 +潍 +éž… +军 +姹 +驶 +笑 +éłź +é˛ +》 +ĺ˝ +é’ś +绿 +ć´± +礴 +ç„Ż +椰 +颖 +ĺ›” +乌 +ĺ” +ĺ·´ +äş’ +性 +椽 +ĺ“ž +č +ć¨ +ć—© +ćš® +č¶ +ç‚€ +隧 +低 +ĺ˝— +ćť +é“ +ĺ‘“ +ć°˝ +č—‰ +ĺ–” +ç™– +ç‘— +姨 +ćť +č± +韦 +ĺ ‘ +čśś +é…‹ +楝 +ç ť +ćŻ +éť“ +ć™ +锲 +究 +屋 +ĺ–ł +骨 +辨 +碑 +ć¦ +é¸ +宫 +čľś +çŠ +适 +坡 +ć® +ĺźą +佩 +äľ› +čµ° +čś +čżź +翼 +况 +姣 +凛 +ćµ” +ĺ +éŁ +债 +犟 +金 +äż +č‹› +ĺ´‡ +ĺť‚ +莳 +ç•” +绂 +ĺ…µ +č • +ć–‹ +ć ą +ç Ť +亢 +欢 +ć¬ +ĺ´” +ĺ‰ +é¤ +榫 +ĺż« +扶 +‖ +ćż’ +çĽ +éłś +当 +ĺ˝ +é© +浦 +篮 +ć€ +锆 +秸 +é’ł +弋 +娣 +çž‘ +夷 +éľ› +č‹« +拱 +致 +% +嵊 +éšś +éš +弑 +ĺť +娓 +抉 +汩 +ç´Ż +č“– +" +唬 +助 +č‹“ +ć™ +押 +毙 +ç ´ +城 +é§ +逢 +嚏 +çŤ +çž» +ćş± +ĺ©ż +赊 +č·¨ +ćĽ +ç’§ +č +姻 +貉 +çµ +炉 +密 +ć°› +陶 +ç ¸ +č°¬ +衔 +ç‚ą +ç› +沛 +ćžł +层 +岱 +诺 +č„Ť +ć¦ +ĺź‚ +ĺľ +冷 +čŁ +打 +čą´ +ç´ +ç +逞 +č› +čŠ +激 +č…± +č +踵 +飒 +č“ź +ĺ† +取 +ĺ’™ +ç°‹ +涓 +çź© +曝 +挺 +揣 +座 +ä˝ +史 +čµ +焱 +ĺ° +č‹Ź +ç¬ +č„š +溉 +榨 +诵 +樊 +é‚“ +ç„Š +义 +庶 +ĺ„‹ +čź‹ +č’˛ +赦 +ĺ‘· +ćťž +čŻ +豪 +čż +试 +颓 +茉 +太 +除 +ç´« +é€ +ç—´ +草 +ĺ…… +éł• +珉 +祗 +墨 +ć¸ +ç© +č¸ +ć…• +ç’‡ +镶 +ç©´ +ĺµ +ć¶ +骂 +险 +绋 +ĺą• +碉 +č‚ş +ćł +ĺ +潞 +秣 +çşľ +潜 +銮 +ć´› +须 +ç˝ +销 +çŞ +汞 +ĺ…® +屉 +r +ćž— +厕 +č´¨ +探 +ĺ’ +狸 +殚 +ĺ–„ +ç…Š +çą +〒 +é” +逯 +宸 +辍 +ćł± +ćźš +袍 +čżś +čą‹ +嶙 +绝 +峥 +娥 +缍 +雀 +ĺľµ +认 +镱 +č°· += +č´© +勉 +ć’© +é„Ż +ć– +ć´‹ +éťž +祚 +ćłľ +诒 +饿 +ć’¬ +ĺ¨ +ć™· +ć +芍 +锥 +笺 +蓦 +候 +çŠ +档 +ç¤ +沼 +卵 +čŤ +ĺż‘ +ćśť +凹 +ç‘ž +头 +仪 +弧 +ĺµ +ç•Ź +铆 +çŞ +衲 +车 +浩 +ć°” +茂 +ć‚– +厢 +ćž• +é…ť +ć´ +ćąľ +é‚ą +飚 +ć” +锂 +写 +宵 +çż +岷 +ć— +ĺ–ś +ä¸ +挑 +ĺ—ź +ç»› +殉 +č®® +槽 +ĺ…· +醇 +ć·ž +ç¬ +é´ +é… +饼 +ĺş• +壕 +ç š +ĺĽ +询 +缕 +ĺşą +çżź +零 +ç· +暨 +čź +é—ş +甯 +ć’ž +éş‚ +茌 +蔼 +ĺľ +珲 +捕 +ćŁ +角 +é‰ +媛 +娲 +诽 +剿 +ĺ°‰ +çµ +睬 +éź© +诰 +匣 +危 +糍 +é•Ż +ç«‹ +浏 +éł +ĺ°‘ +盆 +č” +ć“ +匪 +申 +ĺ°¬ +é“Ł +ć—Ż +抖 +čµ +ç“Ż +ĺ±… +ˇ +ĺ“® +游 +é” +茏 +ćŚ +坏 +甚 +秒 +čž +沙 +ä»— +劲 +潺 +éż +燧 +é +ĺ—– +霏 +ĺż +ćť +奂 +č€ +č·ş +ç € +čľ“ +岖 +媳 +ć°ź +ćž +摆 +çż +今 +扔 +č…» +ćžť +奎 +药 +熄 +ĺ¨ +话 +q +额 +ć…‘ +ĺŚ +协 +ĺ–€ +壳 +ĺź +视 +č‘— +ć–Ľ +愧 +陲 +翌 +ĺł +颅 +ä˝› +č…ą +č‹ +侯 +ĺ’Ž +叟 +秀 +颇 +ĺ +čľ +罪 +ĺ“„ +岗 +扫 +ć Ź +é’ľ +羌 +ĺ·± +ç’¨ +ćž +霉 +ç…Ś +涸 +衿 +é”® +é•ť +益 +岢 +奏 +čżž +夯 +çťż +冥 +均 +çł– +ç‹ž +蹊 +稻 +ç¸ +ĺż +čĄ +ç…ś +丽 +č‚ż +ç’ +掸 +č·š +çľ +ĺž‚ +樾 +ćż‘ +乎 +莲 +窄 +犹 +ć’® +ć +馄 +软 +络 +ćľ +鸢 +č¸ +宾 +妲 +ć• +ĺź” +蝌 +ä»˝ +é‡ +ĺ·§ +çžź +粒 +ć° +剥 +桡 +博 +讯 +凯 +ĺ ‡ +é¶ +滤 +卖 +ć–Ś +骚 +彬 +ĺ…‘ +磺 +樱 +č· +两 +娱 +福 +ä» +ĺ·® +找 +ćˇ +Ă· +净 +把 +é´ +污 +ć¬ +é›· +碓 +蕲 +楚 +罡 +ç„– +抽 +妫 +ĺ’’ +仑 +é—± +ĺ°˝ +é‚‘ +čŹ +ç± +č´· +沥 +éž‘ +牡 +ĺ—‰ +ĺ´´ +骤 +塌 +ĺ—¦ +订 +ć‹® +滓 +捡 +é”» +次 +坪 +ćť© +č‡ +箬 +融 +珂 +éą— +ĺ®— +ćžš +降 +鸬 +妯 +é„ +ĺ ° +ç› +毅 +ĺż… +杨 +ĺ´ +äżş +甬 +状 +čŽ +č´§ +耸 +菱 +č…Ľ +铸 +唏 +ç—¤ +ĺš +ćľł +懒 +ćş… +çż +ç–™ +ćť· +ć·Ľ +缙 +骰 +ĺ–Š +悉 +ç » +ĺť· +艇 +čµ +ç•Ś +č°¤ +纣 +ĺ®´ +ć™ +茹 +ĺ˝’ +éĄ +梢 +铡 +街 +抄 +č‚Ľ +鬟 +č‹Ż +颂 +ć’· +ć +ç‚’ +ĺ’† +čŚ +ç™ +č´ź +ä»° +客 +ç‰ +铢 +ĺ° +卑 +珥 +椿 +镧 +窨 +鬲 +寿 +御 +袤 +é“ +čŽ +ç – +餮 +č„’ +裳 +č‚Ş +ĺ• +ĺ«Ł +馗 +嵇 +ćł +ć°Ż +江 +çźł +褶 +冢 +祸 +é» +ç‹ +çľž +银 +éťł +透 +ĺ’ł +叼 +ć•· +芷 +ĺ•Ą +ĺ® +瓤 +ĺ…° +ç— +懊 +逑 +č‚Ś +往 +捺 +坊 +甩 +ĺ‘» +〠+沦 +ĺż +膻 +祟 +菅 +剧 +ĺ´† +智 +坯 +臧 +霍 +墅 +ć”» +眯 +ĺ€ +拢 +éŞ +é“ +ĺş +岙 +ç“ +′ +缺 +泥 +迢 +捶 +? +? +éŹ +ĺ–™ +掷 +沌 +纯 +ç§ +种 +ĺ¬ +ç» +固 +螨 +团 +香 +ç›— +妒 +ĺźš +č“ť +ć‹– +ć—± +荞 +é“€ +血 +éŹ +汲 +čľ° +叩 +ć‹˝ +ĺą… +硬 +ć¶ +桀 +ćĽ +措 +泼 +唑 +é˝ +č‚ľ +ĺżµ +é…± +虚 +ĺ± +耶 +ć—— +ç ¦ +é—µ +婉 +馆 +ć‹ +ç»… +韧 +忏 +窝 +醋 +č‘ş +顾 +čľž +倜 +ĺ † +čľ‹ +逆 +玟 +č´± +ç–ľ +č‘Ł +ć +倌 +锕 +ć· +ĺ€ +莽 +äż +笏 +绑 +鲷 +ćť +ć‹© +蟀 +粥 +ĺ—Ż +é©° +逾 +ćˇ +č°Ş +褓 +č« +ĺ“© +ć• +颚 +鲢 +ç» +čşş +éą„ +ĺ´‚ +ĺ„’ +俨 +丝 +ĺ°• +泌 +ĺ•Š +č¸ +ĺ˝° +ĺąş +ĺź +骄 +č‹Ł +弦 +č„Š +ç‘° +〠+诛 +é• +ćž +é—Ş +剪 +侧 +ĺ“ź +框 +čž +ĺ® +嬗 +燕 +ç‹ +é“ +缮 +概 +čżł +ç—§ +鲲 +俯 +ĺ”® +笼 +ç—Ł +扉 +挖 +满 +ĺ’‹ +援 +邱 +扇 +ćŞ +äľż +玑 +绦 +峡 +蛇 +叨 +〖 +ćł˝ +č +ć–“ +ĺ–‹ +怂 +ĺťź +猪 +该 +蚬 +ç‚• +弥 +赞 +棣 +ć™” +ĺ¨ +挲 +狡 +ĺ› +ç–– +é“• +é• +稷 +挫 +ĺĽ +ĺ•ľ +çż” +粉 +履 +č‹ +哦 +楼 +秕 +é“‚ +ĺśź +锣 +çź +挣 +ć ‰ +äą +äş« +桢 +袅 +磨 +桂 +č°¦ +延 +ĺťš +蔚 +ĺ™— +署 +č°ź +猬 +é’Ž +ć +嬉 +é›’ +倦 +衅 +亏 +ç’© +çťą +ĺ» +殿 +王 +ç®— +雕 +éş» +ä¸ +柯 +骆 +丸 +塍 +č°š +ć·» +é˛ +ĺž“ +桎 +蚯 +芥 +äş +飕 +镦 +č°Ś +窗 +醚 +菀 +äş® +ćŞ +莺 +č’ż +çľ +足 +J +çśź +轶 +悬 +衷 +éť› +翊 +掩 +ĺ“’ +ç‚… +ćŽ +冼 +妮 +l +č° +稚 +荆 +ć“’ +犯 +陵 +虏 +浓 +ĺ´˝ +ĺŤ +陌 +ĺ‚» +ĺś +ĺŤ +éť– +演 +çźś +é’• +ç…˝ +ćť° +é…— +渗 +伞 +ć ‹ +äż— +ćł« +ćŤ +罕 +沾 +ç–˝ +çŹ +ç…¦ +芬 +磴 +叱 +é± +榉 +ćą +蜀 +叉 +醒 +彪 +租 +éˇ +篷 +屎 +良 +垢 +éš— +弱 +陨 +峪 +ç · +掴 +é˘ +čŽ +雯 +绵 +č´¬ +ć˛ +ć’µ +éš +篙 +ćš– +曹 +陡 +ć “ +填 +臼 +彦 +瓶 +çŞ +潼 +ĺ“Ş +鸡 +ć‘© +啦 +äżź +锋 +ĺźź +耻 +蔫 +ç–Ż +çşą +ć’‡ +毒 +绶 +ç—› +é…Ż +忍 +çŞ +赳 +ć† +ĺą +čľ• +ç +册 +ćś´ +é’± +ĺ® +毯 +癜 +ĺ¨ +č°€ +邵 +厮 +ç‚˝ +ç’ž +é‚ +ä¸ +čż˝ +词 +ç“’ +忆 +轧 +芫 +č°Ż +ĺ–· +弟 +半 +冕 +裙 +掖 +墉 +ç»® +寝 +č‹” +势 +顷 +褥 +ĺ‡ +衮 +ĺ› +佳 +ĺ«’ +čš© +éśž +佚 +ć´™ +逊 +é•– +ćšą +ĺ”› +& +ć®’ +顶 +碗 +獗 +č˝ +é“ş +蛊 +ĺşź +ćą +汨 +ĺ´© +珍 +é‚Ł +ćťµ +曲 +çşş +夏 +č–° +ĺ‚€ +é—ł +ć·¬ +ĺ§ +č€ +拧 +卷 +楂 +ćŤ +讪 +厩 +寮 +篪 +赓 +äą +ç +ç›… +鞣 +沟 +ć…Ž +挂 +饺 +鼾 +ćťł +ć ‘ +缨 +丛 +çµ® +娌 +臻 +ĺ—ł +篡 +äľ© +čż° +衰 +çź› +ĺś +čšś +匕 +çą +匿 +ćżž +晨 +叶 +骋 +éť +挚 +čš´ +滞 +增 +侍 +描 +ç“Ł +ĺ– +嫦 +čź’ +匾 +圣 +赌 +毡 +癞 +ćş +百 +曳 +需 +篓 +č‚® +ĺş– +帏 +卿 +é©ż +é— +蹬 +鬓 +骡 +ć‰ +芎 +čł +ĺ± +禽 +ç¦ +晌 +寄 +媾 +ç‹„ +翡 +č‹’ +čą +廉 +ç» +ç—ž +殇 +々 +畦 +饶 +改 +拆 +ć‚» +č„ +£ +ç“ż +äą +訾 +桅 +匮 +溧 +ć‹Ą +çş± +é“Ť +骗 +č• +éľ‹ +缬 +ç¶ +ä˝ +ç–š +ć Ž +醍 +掳 +č“„ +x +ć† +颜 +鲆 +榆 +〔 +猎 +ć•Ś +ćš´ +č°Ą +鲫 +č´ľ +ç˝— +玻 +缄 +扦 +芪 +癣 +č˝ +ĺľ’ +臾 +ćż +猩 +ć‰ +é‚´ +č‚„ +牵 +ćĄ +é™› +耀 +ĺŠ +ć‹“ +č““ +é‚ł +ĺ • +寇 +枉 +ć·Ś +啡 +ćą„ +ĺ…˝ +é…· +čĽ +碚 +ćż +č¤ +夹 +ć—¬ +ć® +ć˘ +çĄ +ć¤ +ć” +ĺ‹ş +蜊 +ç» +晚 +ĺş +ĺµ +宣 +ć‘„ +冽 +ć—¨ +čŚ +ĺż™ +蚤 +眉 +噼 +čź‘ +ä» +契 +ç“ś +ć‚Ľ +颡 +ĺŁ +曾 +窕 +颢 +澎 +仿 +äż‘ +浑 +嵌 +浣 +乍 +碌 +褪 +äą± +蔟 +éš™ +玩 +ĺ‰ +č‘« +箫 +纲 +ĺ›´ +äĽ +决 +伙 +漩 +ç‘ź +ĺ‘ +č‚“ +é•ł +缓 +čą +ć°¨ +çš“ +ĺ…¸ +畲 +坍 +é“‘ +ćŞ +塑 +ć´ž +倬 +储 +č´ +ć·ł +ćľ +ĺ +çĽ +ćş +妙 +毕 +çŹ +çĽ +č™± +ç›– +çľ° +鸿 +磅 +č°“ +é«… +娴 +č‹´ +ĺ”· +蚣 +éśą +抨 +č´¤ +ĺ” +犬 +誓 +逍 +ĺş +逼 +éş“ +籼 +釉 +ĺ‘ś +碧 +秧 +ć°© +ć‘” +éś„ +穸 +纨 +čľź +ĺ¦ +ć +完 +牛 +缴 +ĺ—· +ç‚Š +ć© +荔 +茆 +掉 +ç´Š +ć…Ś +莓 +çľź +é™ +č +çŁ +另 +č•ą +čľ± +éł +ćą® +ĺˇ +ĺ© +ĺ” +睦 +ĺž +č’ +ĺśś +冗 +çžż +ćşş +芾 +ĺ›± +ĺŚ +ĺł +ć± +菩 +饬 +漓 +黑 +éś° +浸 +濡 +窥 +毂 +č’ˇ +ĺ…˘ +é©» +鹉 +芮 +诙 +čż« +雳 +厂 +ĺż +臆 +猴 +鸣 +蚪 +ć +箕 +羡 +ć¸ +莆 +捍 +çś +ĺ““ +趴 +蹼 +ĺź• +嚣 +骛 +宏 +ć·„ +ć–‘ +噜 +严 +ç‘› +ĺž +椎 +诱 +压 +ĺşľ +绞 +ç„ +廿 +抡 +čż„ +ćŁ +夫 +纬 +锹 +眨 +瞌 +äľ +č„ +ç«ž +瀑 +ĺł +骧 +é +姜 +颦 +荪 +滚 +č¦ +伪 +逸 +粳 +ç¬ +é” +矣 +役 +趣 +ć´’ +颔 +诏 +é€ +奸 +ç” +ć +攀 +čą„ +ćł› +ĺ°Ľ +ć‹Ľ +é® +éą° +äşš +é˘ +ć‘ +ĺ‹’ +〉 +é™… +č‚› +ç· +ĺš +é’¨ +丰 +ĺ…» +冶 +鲽 +辉 +č”» +ç”» +覆 +çš´ +妊 +麦 +čż” +醉 +çš‚ +ć“€ +〗 +é…¶ +凑 +粹 +ć‚ź +诀 +硖 +港 +卜 +z +杀 +涕 +± +čŤ +é“ +抵 +弛 +段 +ć•ť +é• +ĺĄ +ć‹‚ +č˝´ +č·› +袱 +e +t +沉 +菇 +俎 +č–Ş +峦 +ç§ +čźą +历 +盟 +čŹ +寡 +液 +肢 +ĺ–» +ćź“ +裱 +悱 +抱 +ć°™ +赤 +捅 +猛 +č·‘ +ć°® +č°Ł +ä» +ĺ°ş +辊 +窍 +ç™ +衍 +架 +擦 +倏 +ç’ +ç‘ +ĺ¸ +楞 +č– +夔 +趸 +é‚› +ć´ +饕 +č™” +蝎 +§ +哉 +č´ť +ĺ®˝ +čľ« +ç‚® +扩 +饲 +ç±˝ +éŹ +菟 +é”° +伍 +猝 +ćś« +çł +ĺ“š +蛎 +é‚‚ +ĺ‘€ +姿 +é„ž +却 +ć§ +ä»™ +ć¸ +ć¤ +森 +牒 +寤 +袒 +婆 +虢 +é›… +é’‰ +ćśµ +č´Ľ +欲 +č‹ž +寰 +ć•… +éľš +ĺť +ĺ +ĺ’« +礼 +硷 +ĺ…€ +睢 +汶 +’ +铲 +ç§ +绕 +čŻ +ćµ +é’ż +ĺ“ş +ćźś +讼 +颊 +ç’ +č…” +ć´˝ +ĺ’ +脲 +ç°Ś +ç +é•Ł +玮 +éž +č° +ĺ…Ľ +姆 +挥 +梯 +čť´ +č° +漕 +ĺ· +躏 +宦 +弼 +b +垌 +ĺŠ +éşź +莉 +ćŹ +笙 +渎 +仕 +ĺ—¤ +仓 +é…Ť +怏 +抬 +é”™ +泯 +é•Š +ĺ° +猿 +é‚Ş +仍 +秋 +鼬 +壹 +ć‡ +ĺµ +ç‚Ľ +< +ĺ°§ +ĺ°„ +柬 +ĺ»· +č§ +éśľ +凳 +éš‹ +č‚š +ćµ® +梦 +祥 +ć Ş +ĺ µ +退 +L +éą« +č·Ž +凶 +毽 +荟 +ç‚« +ć © +玳 +甜 +沂 +éąż +顽 +伯 +çą +čµ” +č›´ +ĺľ +匡 +欣 +ç‹° +缸 +雹 +蟆 +ç–¤ +é» +沤 +ĺ•ś +ç—‚ +衣 +禅 +w +i +h +čľ˝ +č‘ł +黝 +é’— +ĺś +沽 +棒 +馨 +颌 +肉 +ĺ´ +硫 +ć‚Ż +劾 +ĺ¨ +马 +啧 +ĺŠ +ć‚Ś +é•‘ +ĺł +帆 +瀣 +涉 +ĺ’¸ +ç–¸ +滋 +泣 +翦 +ć‹™ +癸 +é’Ą +čś’ ++ +ĺ°ľ +ĺş„ +凝 +泉 +婢 +渴 +č°Š +äąž +陆 +锉 +糊 +鸦 +ć·® +I +B +N +晦 +弗 +äą” +庥 +葡 +ĺ°» +ĺ¸ +橡 +ĺ‚Ł +渣 +ć‹ż +ć© +éş‹ +ć–› +çĽ +çź® +蛏 +ĺ˛ +鸽 +ĺ§ +膏 +催 +奔 +é•’ +ĺ–± +č ˇ +摧 +é’Ż +č¤ +ćź +ć‹ +ç’‹ +鸥 +卢 +荡 +倾 +^ +_ +珀 +逄 +č§ +塾 +掇 +č´® +笆 +č‚ +ĺś +冲 +嵬 +M +ć»” +笕 +值 +ç‚™ +ĺ¶ +čś± +ć +梆 +汪 +蔬 +č…‘ +鸯 +蹇 +ć•ž +绯 +仨 +祯 +č°† +梧 +çł— +é‘« +啸 +豺 +囹 +猾 +ĺ·˘ +ćź„ +瀛 +ç‘ +踌 +ć˛ +ćš— +č‹ +鱿 +蹉 +č„‚ +č– +牢 +ç +木 +ĺ¸ +ćş +ĺ® +序 +ćłž +ĺż +ć‹ś +檩 +厚 +ćś +毗 +čžł +ĺž +媚 +ćś˝ +ć‹… +čť— +ć© +ç•´ +çĄ +çłź +ç›± +隼 +éś +ćś +çŹ +裨 +铵 +ç„™ +çš +唯 +ĺ’š +噪 +骊 +丫 +滢 +勤 +棉 +呸 +ĺ’Ł +ć·€ +éš” +č•ľ +çŞ +饨 +挨 +ç…… +çź +匙 +粕 +é•ś +赣 +ć’• +墩 +é…¬ +é¦ +豌 +é˘ +抗 +é…Ł +ć°“ +佑 +ć +ĺ“ +递 +耷 +涡 +ćˇ +č´» +碣 +ćŞ +ç¦ +ć +é•Ś +蔓 +ć°š +甲 +猕 +č•´ +蓬 +ć•Ł +ć‹ľ +çş› +ç‹Ľ +猷 +é“Ž +ĺź‹ +ć—– +çźľ +讳 +囊 +çłś +čż +粟 +čš‚ +ç´§ +鲳 +ç˘ +ć ˝ +稼 +羊 +锄 +ć–ź +çť +桥 +ç“® +čą™ +祉 +醺 +鼻 +ć± +ĺ‰ +č·ł +篱 +č·· +č’ś +翎 +ĺ®… +ć™– +ĺ—‘ +壑 +ĺł» +癫 +屏 +ç‹ +陋 +袜 +途 +憎 +祀 +莹 +滟 +佶 +溥 +臣 +约 +ç›› +ĺł° +çŁ +ć…µ +ĺ©Ş +拦 +莅 +ćś• +鹦 +粲 +裤 +ĺ“Ž +ç–ˇ +ĺ«– +çµ +窟 +ĺ Ş +č°› +ĺ‰ +儡 +éłť +ć–© +éľ +驸 +é…Š +妄 +čś +č´ş +ĺľ™ +ĺ‚… +噌 +é’˘ +ć … +庇 +ć‹ +匝 +ĺ·Ż +é‚ +ĺ°¸ +锚 +粗 +佟 +蛟 +č–ą +çşµ +蚊 +é… +绢 +é” +č‹— +äżž +篆 +ć·† +膀 +鲜 +ç…Ž +诶 +秽 +寻 +涮 +ĺş +怀 +噶 +ĺ·¨ +褰 +é… +ç¶ +çŚ +桉 +č—• +č°ś +č¸ +č–„ +ć€ +ć˝ +借 +牯 +ç—‰ +渥 +ć„ż +äş“ +č€ +ćť +ćź© +é”” +蚶 +é’Ł +çŹ +ĺ– +čą’ +ĺą˝ +čµ +稗 +晤 +莱 +ćł” +扯 +č‚Ż +菪 +裆 +č…© +豉 +ç–† +骜 +č… +ĺ€ +珏 +ĺ”” +粮 +亡 +润 +ć…° +伽 +ć©„ +玄 +誉 +é† +č† +龊 +粼 +塬 +陇 +彼 +削 +ĺ—Ł +绾 +芽 +妗 +ĺž +ç´ +ç˝ +č–Ź +寨 +éľ +ćł +弹 +赢 +漪 +猫 +ĺ§ +涂 +ć¤ +ĺś +茧 +ç˝ +屑 +ç—• +ĺ·ľ +čµ– +荸 +凰 +č…® +ç• +äşµ +蹲 +ĺ +苇 +ćľś +艮 +换 +骺 +ç +č‹• +梓 +颉 +肇 +ĺ“— +ć‚„ +ć°¤ +ć¶ +葬 +ĺ± +éą +植 +ç«ş +佯 +诣 +鲇 +ç€ +鲅 +邦 +移 +ć» +冯 +耕 +ç™” +ćŚ +茬 +ć˛ +ĺ·© +ć‚ +ćą +ć´Ş +ç—ą +锟 +循 +č°‹ +č…• +éł +é’ +捞 +焉 +迎 +碱 +伫 +急 +榷 +ĺĄ +é‚ť +卯 +čľ„ +皲 +卟 +醛 +ç•ą +忧 +稳 +雄 +ćĽ +缩 +é +çť‘ +扌 +耗 +曦 +涅 +捏 +瞧 +é‚• +ć·– +漉 +é“ť +耦 +禹 +ćą› +ĺ–˝ +莼 +ç… +诸 +č‹Ž +çş‚ +硅 +始 +ĺ—¨ +ĺ‚Ą +ç‡ +臂 +čµ… +ĺ +呆 +č´µ +屹 +壮 +č‚‹ +亍 +蚀 +卅 +豹 +č…† +邬 +čż +浊 +} +ç«Ą +čž‚ +ćŤ +ĺś© +ĺ‹ +触 +寞 +汊 +壤 +荫 +膺 +渌 +芳 +懿 +é´ +čž +ćł° +č“Ľ +蛤 +茜 +č… +ćž« +ćś” +膝 +çś™ +éż +梅 +ĺ¤ +éąś +ç’ś +牍 +缅 +ĺž« +č—» +é»” +侥 +ćš +懂 +踩 +č…° +č… +ćś +丞 +唾 +ć… +顿 +ć‘ą +荻 +ç¬ +~ +ć–§ +ć˛ +滂 +č +č€ +ĺą„ +莜 +Z +匀 +é„„ +掌 +ç»° +茎 +ç„š +赋 +č± +č°‘ +ć± +é“’ +瞎 +夺 +čś— +野 +娆 +冀 +弯 +çŻ +懵 +çž +éš˝ +芡 +č„ +äż +čľ© +芯 +掺 +ĺ–Ź +č† +čť +č§ +ć‚š +踹 +č”— +ç† +éĽ +呵 +抓 +ć©Ľ +峨 +ç•ś +缔 +禾 +ĺ´ +ĺĽ +熊 +ć‘’ +凸 +ć‹— +ç©ą +č’™ +抒 +祛 +劝 +é—« +扳 +éµ +醌 +踪 +ĺ–µ +侣 +ć¬ +ä»… +荧 +赎 +čťľ +ç¦ +äą° +婧 +çž„ +寓 +皎 +冻 +赝 +箩 +莫 +çž° +éŠ +笫 +姝 +ç’ +枪 +éŁ +ç…¸ +袋 +č† +ç—± +涛 +母 +〇 +ĺŻ +č·µ +耙 +绲 +ç› +é‚ +ćŠ +ćž +槿 +诬 +çş° +ćł“ +ć¨ +檬 +äş» +越 +C +o +憩 +熵 +祷 +é’’ +暧 +塔 +é— +č° +ĺ’„ +娶 +é” +ç¶ +é’ž +é‚» +扬 +杉 +ć®´ +ĺ’˝ +弓 +〆 +é«» +】 +ĺ +揽 +霆 +ć‹„ +ć®– +脆 +ĺ˝» +岩 +芝 +ĺ‹ +辣 +剌 +é’ť +ĺŽ +甄 +ä˝ +çš– +伦 +ćŽ +ĺľ• +憔 +挪 +皇 +ĺşž +稔 +芜 +踏 +ćş´ +ĺ…– +卒 +擢 +饥 +éłž +ç…˛ +‰ +č´¦ +颗 +叻 +ć–Ż +捧 +鳍 +ç® +讹 +č›™ +çş˝ +č° +é…¸ +ĺ…” +莒 +睇 +伟 +觑 +羲 +ĺ—ś +宜 +č¤ +ć—Ž +čľ› +卦 +čŻ +ç‹ +鎏 +溪 +挛 +熔 +éś +ć™° +éł… +丢 +奚 +ç¸ +呱 +献 +陉 +é»› +鸪 +甾 +č¨ +ç–® +ć‹Ż +ć´˛ +ç–ą +čľ‘ +叙 +ć» +č°’ +ĺ… +ćź” +ç‚ +ć°Ź +逅 +漆 +ć‹Ž +ć‹ +ć‰ +ćąź +çş +ĺ•• +掬 +ć“ž +ĺ“Ą +ĺż˝ +涤 +鸵 +靡 +é— +ç“· +ć‰ +廊 +怨 +雏 +é’® +敦 +E +懦 +憋 +ć±€ +ć‹š +啉 +č…Ś +岸 +f +ç—Ľ +çž… +ĺ°Š +ĺ’€ +çś© +飙 +忌 +仝 +迦 +熬 +毫 +čŻ +篑 +茄 +č…ş +凄 +č› +碴 +锵 +诧 +羯 +後 +漏 +汤 +宓 +仞 +čš +壶 +č°° +çš‘ +é“„ +棰 +ç˝” +čľ… +晶 +苦 +牟 +é—˝ +\ +ç +饮 +čż +丙 +蛳 +ćś± +ç…¤ +涔 +éł– +çŠ +ç˝ +荼 +ç ’ +ć·¦ +妤 +黏 +ćŽ +ĺ‘ +ĺ©• +ç‘ľ +ć˘ +é’µ +枣 +捋 +ç Ą +衩 +ç‹™ +ćˇ +稣 +éŽ +č‚ +梏 +诫 +ĺŞ +ć¶ +ĺ©Š +衫 +ĺ—” +äľ +塞 +čś +樵 +ĺł’ +貌 +屿 +欺 +缫 +é +ć – +诟 +珞 +čŤ +ĺť +čŤ +ĺ—˝ +ć‚ +ĺ•» +čś´ +磬 +ĺł‹ +俸 +豫 +č°Ž +徊 +é•Ť +韬 +é‡ +ć™´ +U +囟 +猜 +č›® +ĺť +囿 +伴 +äş +č‚ť +ä˝— +čť +ĺ¦ +čž +滩 +榴 +ć°– +ĺž© +č‹‹ +ç Ł +扪 +馏 +姓 +轩 +厉 +夥 +äľ +禀 +ĺž’ +岑 +赏 +é’› +čľ +ç—” +披 +纸 +碳 +“ +ĺťž +č “ +挤 +荥 +沅 +ć‚” +铧 +帼 +č’Ś +蝇 +a +p +y +n +g +ĺ“€ +浆 +瑶 +凿 +桶 +é¦ +çš® +奴 +č‹ś +佤 +伶 +ć™— +铱 +炬 +äĽ +弊 +ć°˘ +ć +甫 +攥 +ç«Ż +锌 +ç° +稹 +ç‚ť +ć›™ +é‚‹ +亥 +眶 +碾 +拉 +čť +ç»” +捷 +浍 +č…‹ +姑 +菖 +凌 +涞 +éş˝ +锢 +桨 +潢 +绎 +é•° +殆 +锑 +渝 +铬 +ĺ›° +ç»˝ +觎 +ĺŚ +çł™ +ćš‘ +裹 +鸟 +ç›” +č‚˝ +čż· +綦 +『 +äşł +佝 +äż +é’´ +觇 +骥 +仆 +ç–ť +č·Ş +婶 +éŻ +瀹 +唉 +č„– +踞 +é’ +晾 +ĺż’ +扼 +çž© +叛 +椒 +ç–ź +ĺ—ˇ +é‚— +肆 +č·† +玫 +忡 +捣 +ĺ’§ +唆 +艄 +č‘ +潦 +笛 +éš +沸 +ćł» +掊 +菽 +č´« +ć–Ą +é«‚ +ĺ˘ +é•‚ +赂 +éşť +鸾 +屡 +衬 +č‹· +ćŞ +ĺŹ +希 +粤 +ç» +ĺ–ť +茫 +ć¬ +é¸ +ç»» +庸 +ć’… +碟 +宄 +妹 +膛 +叮 +饵 +ĺ´› +ĺ—˛ +椅 +冤 +ć… +ĺ’• +ć•› +ĺ°ą +垦 +é—· +蝉 +霎 +ĺ‹° +č´Ą +č“‘ +泸 +肤 +鹌 +幌 +焦 +ćµ +鞍 +ĺ +č° +äą™ +ç«ż +裔 +。 +茵 +函 +伊 +ĺ…„ +丨 +娜 +匍 +謇 +莪 +宥 +似 +čť˝ +çżł +é…Ş +çż +粑 +č–‡ +祢 +骏 +čµ +叫 +Q +噤 +ĺ™» +ç«– +芗 +čŽ +ć˝ +俊 +çľż +耜 +O +é« +č¶ +ĺ—Ş +囚 +蹶 +芒 +ć´ +笋 +éą‘ +敲 +硝 +啶 +ĺ ˇ +渲 +揩 +』 +ćş +宿 +é’ +颍 +ć‰ +棱 +割 +čś +蔸 +葵 +ç´ +捂 +饰 +衙 +耿 +ćŽ +ĺ‹ź +岂 +窖 +涟 +蔺 +ç¤ +ćźž +瞪 +怜 +匹 +č·ť +楔 +ç‚ś +哆 +秦 +缎 +幼 +čŚ +绪 +ç—¨ +ć¨ +楸 +娅 +瓦 +桩 +雪 +嬴 +伏 +榔 +妥 +é“ż +ć‹Ś +çś +雍 +缇 +†+卓 +ć“ +ĺ“Ś +觞 +噩 +ĺ± +哧 +é«“ +ĺ’¦ +ĺ·… +娑 +äľ‘ +ć·« +膳 +祝 +ĺ‹ľ +姊 +莴 +č„ +ç– +č–› +čś· +č› +ĺ·· +芙 +芋 +熙 +é—° +ĺ‹ż +çŞ +狱 +剩 +é’Ź +幢 +陟 +é“› +ć…§ +éť´ +耍 +k +ćµ™ +浇 +飨 +ćź +ç»— +祜 +ćľ +ĺ•Ľ +ĺ’Ş +磷 +ć‘ž +诅 +é¦ +抹 +č· +壬 +ĺ• +č‚– +çŹ +颤 +ĺ°´ +剡 +ćŠ +凋 +赚 +泊 +ć´Ą +宕 +ć®· +倔 +ć°˛ +漫 +é‚ş +涎 +ć€ +$ +ĺž® +荬 +éµ +俏 +叹 +噢 +饽 +čś +ĺ™ +çµ +ç–Ľ +éž +羧 +牦 +ç® +ć˝´ +c +眸 +çĄ +é«Ż +ĺ•– +ĺťł +ć„ +芩 +é©® +倡 +ĺ·˝ +ç©° +ć˛ +čš +怒 +凤 +槛 +剂 +趵 +ĺ« +v +邢 +çŻ +鄢 +ćˇ +çť˝ +檗 +锯 +槟 +ĺ©· +嵋 +ĺś» +诗 +č• +é˘ +é +ç—˘ +芸 +怯 +馥 +ç« +é”— +ĺľś +ć +éŤ +ç± +剑 +ĺ± +苡 +éľ„ +ĺ§ +桑 +潸 +ĺĽ +澶 +楹 +悲 +讫 +愤 +č…Ą +悸 +č°Ť +椹 +呢 +桓 +č‘ +攫 +é€ +çż° +躲 +ć•– +ćź‘ +éŽ +笨 +橇 +ĺ‘ +é +燎 +č„“ +č‘© +磋 +ĺž› +玺 +ç‹® +沓 +ç ś +č•Š +锺 +罹 +蕉 +çż± +č™ +é—ľ +ĺ·« +ć—¦ +茱 +嬷 +枯 +鹏 +č´ˇ +芹 +ć±› +çź« +ç» +ć‹Ł +禺 +ä˝ +讣 +č« +ćŻ +äął +趋 +ç–˛ +挽 +岚 +虾 +衾 +č ą +čą‚ +飓 +ć°¦ +é“– +ĺ© +稞 +ç‘ś +壅 +掀 +ĺ‹ +妓 +ç•… +é«‹ +W +ĺş +牲 +č“ż +榕 +ç» +垣 +ĺ”± +邸 +菲 +ć† +ĺ©ş +ç©ż +绡 +éş’ +čš± +掂 +ć„š +ćł· +涪 +漳 +妩 +娉 +榄 +č®· +觅 +ć—§ +č—¤ +ç…® +ĺ‘› +ćźł +č…“ +ĺŹ +ĺşµ +ç· +éˇ +罂 +čś• +ć“‚ +猖 +ĺ’ż +媲 +脉 +〠+沏 +貅 +é» +熏 +哲 +ç +坦 +é…µ +ĺ…ś +Ă— +潇 +ć’’ +剽 +珩 +ĺśą +äąľ +摸 +樟 +帽 +ĺ—’ +襄 +é‚ +轿 +憬 +锡 +〕 +ĺ– +皆 +ĺ’– +éš… +脸 +残 +ćł® +袂 +éą‚ +珊 +囤 +捆 +ĺ’¤ +误 +徨 +é—ą +ć·™ +芊 +ć·‹ +怆 +ĺ›— +拨 +梳 +渤 +R +G +绨 +čš“ +ĺ©€ +幡 +ç‹© +éşľ +č°˘ +唢 +裸 +ć—Ś +伉 +纶 +裂 +é©ł +ç Ľ +ĺ’› +ćľ„ +樨 +čą +ĺ®™ +澍 +倍 +貔 +ć“Ť +勇 +čź +ć‘ +ç § +虬 +够 +çĽ +悦 +č—ż +ć’¸ +艹 +ć‘ +ć·ą +豇 +虎 +ć¦ +ˉ +ĺ± +d +° +ĺ–§ +荀 +踱 +äľ® +奋 +ĺ• +饷 +犍 +ć® +ĺť‘ +ç’Ž +ĺľ +ĺ®› +妆 +č˘ +倩 +窦 +ć‚ +荏 +äą– +K +怅 +ć’° +éł™ +牙 +č˘ +é…ž +X +ç—ż +çĽ +é—¸ +é› +趾 +荚 +č™» +涝 +《 +杏 +éź +ĺ +ç¤ +绫 +éž +卉 +ç—‡ +é˘ +č“Ą +诋 +ćť +荨 +匆 +ç«Ł +ç°Ş +čľ™ +ć•• +虞 +丹 +çĽ +ĺ’© +黟 +m +ć·¤ +ç‘• +ĺ’‚ +铉 +硼 +茨 +嶂 +ç—’ +畸 +敬 +涿 +粪 +çŞ +熟 +叔 +ĺ«” +盾 +ĺż± +čŁ +憾 +梵 +赡 +珙 +ĺ’Ż +ĺ¨ +ĺş™ +溯 +čş +葱 +ç—Ş +ć‘Š +荷 +卞 +äą’ +髦 +ĺŻ +é“ +ĺť© +č— +ćž· +ç† +ćşź +嚼 +çľš +ç ¬ +轨 +ćŠ +ćŚ +罄 +ç«˝ +菏 +ć°§ +ćµ… +楣 +盼 +枢 +炸 +é† +杯 +č°Ź +噬 +ć·‡ +渺 +俪 +秆 +墓 +泪 +č·» +ç Ś +ç—° +垡 +渡 +耽 +釜 +讶 +鳎 +ç…ž +ĺ‘— +韶 +č¶ +ç»· +éął +缜 +ć—· +é“Š +çš± +龌 +檀 +éś– +奄 +ć§ +艳 +蝶 +ć—‹ +ĺ“ť +赶 +骞 +蚧 +č…Š +ç› +ä¸ +` +čśš +矸 +čť™ +睨 +ĺš“ +ĺ» +鬼 +醴 +夜 +彝 +磊 +笔 +ć‹” +ć € +çł• +厦 +é‚° +çş« +é€ +纤 +眦 +膊 +馍 +躇 +çŻ +čĽ +冬 +诤 +ćš„ +骶 +ĺ“‘ +ç +」 +臊 +丕 +ć„ +ĺ’± +čžş +ć“… +č·‹ +ćŹ +硪 +č°„ +ç¬ +ć·ˇ +ĺż +骅 +č°§ +鼎 +çš‹ +姚 +ćĽ +č ˘ +é©Ľ +耳 +č¬ +挝 +涯 +ç‹— +č’˝ +ĺ“ +犷 +凉 +芦 +ç®´ +铤 +ĺ¤ +ĺ› +坤 +V +茴 +朦 +挞 +ĺ°– +ć©™ +诞 +ć´ +碇 +ć´µ +浚 +帚 +蜍 +漯 +ćź +嚎 +č®˝ +čŠ +荤 +ĺ’» +çĄ +秉 +č·– +ĺź +ĺ“ +糯 +çś· +馒 +ćą +娼 +鲑 +ĺ«© +č®´ +č˝® +瞥 +靶 +褚 +乏 +缤 +宋 +帧 +ĺ +驱 +碎 +扑 +äż© +äż„ +ĺŹ +涣 +ç«ą +ĺ™± +çš™ +ä˝° +渚 +唧 +ć–ˇ +# +镉 +ĺ€ +ĺ´Ž +ç +佣 +ĺ¤ +č´° +č‚´ +ĺł™ +ĺ“” +艿 +ĺŚ +牺 +é•› +çĽ +仡 +嫡 +劣 +枸 +ĺ € +梨 +ç°ż +é¸ +č’¸ +亦 +稽 +ćµ´ +{ +衢 +ćťź +槲 +j +é +揍 +ç–Ą +棋 +潋 +čŞ +窜 +äą“ +çť› +插 +冉 +éŞ +č‹Ť +ć˝ +「 +čźľ +čžź +幸 +仇 +樽 +ć’‚ +ć…˘ +č·¤ +ĺą” +äżš +ć·… +č¦ +觊 +溶 +妖 +帛 +侨 +ć›° +妾 +ćł— +· +: +ç€ +風 +Ă‹ +ďĽ +) +ⶠ+ç´… +ç´— +ç‘ +雲 +é +鶏 +財 +許 +• +ÂĄ +樂 +ç„— +éş— +— +; +ć»™ +ćť± +榮 +繪 +č +… +é–€ +ćĄ +Ď€ +楊 +ĺś‹ +顧 +Ă© +盤 +寳 +Λ +龍 +éłł +島 +誌 +ç·Ł +çµ +éŠ +č¬ +ĺ‹ť +祎 +ç’ź +ĺ„Ş +ćˇ +臨 +時 +購 += +â… +č—Ť +ć‡ +éµ +觀 +ĺ‹… +農 +č˛ +ç•« +ĺ…ż +術 +發 +劉 +č¨ +ĺ° +耑 +ĺś’ +書 +壴 +種 +Îź +â—Ź +褀 +號 +銀 +匯 +ć•ź +é” +葉 +ć©Ş +廣 +進 +č’„ +é‘˝ +éť +祙 +貢 +鍋 +豊 +夬 +ĺ–† +ĺś +é–Ł +é–‹ +ç‡ +čł“ +館 +é…ˇ +沔 +é † ++ +硚 +劵 +饸 +é™˝ +車 +ćą“ +ĺľ© +čŠ +ć°Ł +č»’ +華 +ĺ +čż® +çşź +ć¶ +馬 +ĺ¸ +裡 +é›» +嶽 +獨 +ăž +ă‚· +サ +ジ +ç‡ +袪 +ç’° +❤ +臺 +çŁ +ĺ°‚ +賣 +ĺ– +č– +攝 +ç·š +â–Ş +α +傢 +俬 +夢 +é” +莊 +ĺ–¬ +貝 +č–© +劍 +çľ… +壓 +棛 +饦 +ĺ° +ç’ +囍 +醫 +G +I +A +ďĽ +N +é·„ +é«™ +嬰 +ĺ•“ +ç´„ +éšą +ć˝” +čł´ +č—ť +~ +寶 +籣 +éşş +  +嶺 +âš +çľ© +網 +ĺł© +é•· +⧠+éš +ć©ź +構 +② +鳯 +ĺ‰ +L +B +㙟 +畵 +é´ż +' +č©© +ćşť +ĺšž +屌 +č—” +佧 +玥 +č +çą” +1 +3 +9 +ďĽ +7 +點 +ç +é´¨ +é‹Ş +éŠ +廳 +弍 +‧ +創 +湯 +坶 +â„ +卩 +骝 +& +çś +čŤ +當 +潤 +扞 +äż‚ +懷 +碶 +é’… +蚨 +č® +↠+叢 +ç˛ +ĺź— +涫 +塗 +→ +楽 +現 +鯨 +ć„› +ç‘Ş +éş +ĺż„ +悶 +č—Ą +飾 +樓 +視 +ĺ¬ +ㆍ +燚 +č‹Ş +師 +â‘ +丼 +é”˝ +│ +éź“ +標 +è +ĺ…’ +é–Ź +匋 +張 +漢 +Ăś +é«Ş +ćś +é–‘ +檔 +çż’ +裝 +ă® +峯 +čŹ +輝 +Đ +雞 +釣 +ĺ„„ +ćµ +K +O +R +ďĽ +H +E +P +T +W +D +S +C +ďĽ +F +姌 +饹 +» +晞 +ĺ»° +ä +嵯 +é·ą +č˛ +飲 +絲 +冚 +楗 +澤 +綫 +區 +âť‹ +↠+質 +éť‘ +揚 +③ +滬 +çµ± +産 +協 +ďą‘ +乸 +ç• +經 +é‹ +éš› +ć´ş +岽 +ç‚ş +粵 +č«ľ +ĺ´‹ +č± +ç˘ +É” +V +2 +6 +齋 +čŞ +訂 +´ +ĺ‹‘ +é›™ +陳 +無 +Ă +ćł© +媄 +夌 +ĺ‚ +i +ď˝ +ď˝” +o +ď˝’ +ď˝ +ĺ˘ +耄 +燴 +ćš +壽 +媽 +éť +抻 +é«” +ĺ”» +É +冮 +甹 +鎮 +錦 +ĘŚ +čś› +č „ +ĺ°“ +駕 +ć€ +飬 +逹 +倫 +貴 +極 +ĐŻ +Đ™ +寬 +磚 +嶪 +郎 +č· +| +é–“ +n +d +剎 +äĽ +課 +飛 +ć©‹ +çŠ +â„– +čś +骓 +ĺś— +ć» +縣 +粿 +ĺ’… +養 +濤 +彳 +® +% +â…ˇ +ĺ•° +ă´Ş +見 +çźž +č–¬ +çł +邨 +鲮 +顔 +ç˝± +Đ— +é¸ +話 +č´Ź +ć°Ş +äżµ +競 +ç‘© +繡 +ćž± +β +綉 +á +獅 +çľ +™ +éşµ +ć‹ +ć·© +ĺľł +個 +劇 +ĺ ´ +ĺ‹™ +ç°ˇ +寵 +ď˝ +實 +č† +č˝± +ĺś– +築 +ĺŁ +樹 +㸠+營 +耵 +ĺ« +éĄ +é„ş +飯 +麯 +é +輸 +ĺť« +ĺ +äąš +é– +鏢 +㎡ +題 +ĺ» +é—ś +↑ +çş +ĺ°‡ +軍 +連 +篦 +覌 +ĺŹ +箸 +- +çŞ +棽 +寕 +夀 +ç° +ć +ĺ‘™ +é–Ą +é ˇ +熱 +雎 +ĺžź +裟 +凬 +ĺ‹ +帑 +馕 +夆 +ç–Ś +枼 +馮 +貨 +č’¤ +樸 +彧 +ć—¸ +éťś +龢 +暢 +ă± +鳥 +珺 +鏡 +çˇ +ç +ĺ · +廚 +Ă“ +騰 +診 +â”… +č‡ +褔 +凱 +é ‚ +豕 +äşž +帥 +ĺ¬ +⊥ +仺 +桖 +複 +饣 +絡 +ç©‚ +顏 +棟 +ç´Ť +â–Ź +ćżź +親 +č¨ +č¨ +攵 +埌 +çş +ò +é ¤ +燦 +č“® +ć’» +節 +講 +ćż± +ćż +娽 +ć´ł +ćśż +ç‡ +é´ +č· +膚 +é“” +éŽ +補 +Z +U +5 +4 +ĺť‹ +é—ż +ä–ť +é¤ +çĽ +é“ž +貿 +é“Ş +桼 +趙 +鍊 +[ +ă‚ +ĺžš +菓 +揸 +捲 +é +滏 +𣇉 +çŤ +輪 +燜 +é´» +é®® +ĺ‹• +éąž +é·— +丄 +ć…¶ +鉌 +翥 +飮 +č…¸ +⇋ +ćĽ +覺 +來 +ç† +ć´ +翏 +鲱 +圧 +鄉 +č +é ” +ç +ĺ«š +Đł +č˛ +類 +čŻ +ĺą› +輕 +訓 +é‘’ +夋 +锨 +čŠ +珣 +䝉 +扙 +ĺµ +銷 +處 +ㄱ +語 +čŞ +č‹ť +ć¸ +ĺ„€ +燒 +楿 +ĺ…§ +粢 +č‘’ +奧 +麥 +礻 +滿 +č ” +穵 +çž +ć…‹ +鱬 +榞 +硂 +é„ +é» +ç…™ +çĄ +奓 +逺 +* +ç‘„ +獲 +čž +č–¦ +讀 +這 +樣 +決 +ĺ•Ź +ĺ•ź +們 +ĺź· +説 +轉 +ĺ–® +隨 +ĺ” +帶 +倉 +ĺş« +é‚„ +č´ +ĺ°™ +çšş +â– +餅 +產 +â—‹ +â +ĺ ± +ç‹€ +楓 +čł +çŻ +ĺ—® +禮 +` +ĺ‚ł +> +≤ +ĺ—ž +Φ +≥ +換 +ĺ’ +⣠+↓ +曬 +ε +ĺżś +寫 +″ +終 +ć§ +ç´” +費 +療 +č¨ +凍 +ĺŁ +éµ +ĂĽ +é»’ +â« +製 +塊 +調 +č»˝ +確 +ć’ +ç´š +馴 +â…˘ +涇 +çąą +數 +碼 +č‰ +ç‹’ +処 +劑 +< +晧 +賀 +衆 +] +ć«Ą +ĺ…© +é™° +絶 +ĺ°Ť +鯉 +憶 +â—Ž +ď˝ +ď˝… +Y +č•’ +ç…– +é “ +測 +試 +鼽 +ĺ‘ +碩 +妝 +帯 +≠+éˇ +č– +權 +ĺ–« +倆 +Ë‹ +該 +ć‚… +Ä +äż« +. +f +s +b +m +k +g +u +j +貼 +ć·¨ +ćż• +針 +é© +ĺ‚™ +l +/ +給 +謢 +強 +觸 +衛 +č‡ +⊙ +$ +ç·Ż +變 +â‘´ +⑵ +⑶ +㎏ +殺 +â© +ĺąš +─ +ĺą +â–˛ +離 +Ăş +Ăł +飄 +çŹ +é–˘ +é–ź +ďąť +ďąž +é‚Ź +輯 +鍵 +é©— +訣 +ĺ°Ž +ć· +屆 +層 +â–Ľ +儱 +錄 +熳 +Ä“ +艦 +ĺ‹ +錶 +辧 +飼 +顯 +â‘Ł +禦 +販 +ć°— +対 +ćž° +é–© +ç´€ +ĺąą +çž“ +貊 +ć·š +â–ł +çśž +墊 +Ω +獻 +褲 +縫 +ç·‘ +äşś +鉅 +é¤ +ď˝› +} +â—† +č† +č– +â– +â—‡ +ćş« +ĺ˝ +晳 +粧 +犸 +ç©© +訊 +ĺ´¬ +凖 +熥 +Đź +čŠ +條 +ç´‹ +圍 +â…Ł +ç† +ĺ°· +難 +雜 +錯 +ç¶ +č +é ° +鎖 +艶 +â–ˇ +ć® +殼 +⑧ +├ +â–• +鵬 +Ç +ĹŤ +Ç’ +çłť +綱 +â–Ž +ÎĽ +盜 +饅 +醬 +籤 +č“‹ +釀 +éą˝ +ć“š +Ă +ɡ +辦 +â—Ą +ĺ˝ +┌ +婦 +獸 +鲩 +伱 +Ä« +č’ź +č’» +齊 +袆 +č…¦ +寧 +ĺ‡ +妳 +ç…Ą +詢 +ĺ˝ +謹 +ĺ•« +鯽 +騷 +鱸 +ćŤ +ĺ‚· +鎻 +é«® +買 +冏 +ĺ„Ą +両 +﹢ +âž +載 +ĺ–° +z +çľ™ +悵 +燙 +曉 +員 +組 +ĺľą +艷 +ç— +é‹Ľ +鼙 +縮 +ç´° +ĺš’ +çŻ +≠+ç¶ +" +é±» +壇 +厍 +帰 +浥 +犇 +č–ˇ +軎 +² +應 +醜 +ĺŞ +ç·» +鶴 +čłś +ĺ™ +軌 +ĺ°¨ +é•” +é·ş +槗 +彌 +č‘š +ćż› +č«‹ +溇 +ç·ą +賢 +訪 +獴 +ç‘… +資 +縤 +陣 +č•ź +ć ˘ +éź» +祼 +ć +伢 +謝 +ĺŠ +涑 +總 +衖 +踺 +ç ‹ +凉 +ç± +駿 +č‹Ľ +ç‹ +ć˝ +ç´ˇ +é©Š +č…Ž +ďą— +éźż +ćť‹ +剛 +ĺš´ +禪 +ć“ +槍 +ĺ‚ +檸 +檫 +ç‚Ł +勢 +鏜 +鎢 +銑 +ĺ° +減 +奪 +ćˇ +θ +ĺ® +ĺ© +č‡ +Ĺ« +ì +ć®» +鉄 +â‘ +蛲 +ç„Ľ +ç·– +續 +ç´ą +懮 \ No newline at end of file diff --git a/deepdoc/visual/operators.py b/deepdoc/visual/operators.py new file mode 100644 index 0000000..a4ef57c --- /dev/null +++ b/deepdoc/visual/operators.py @@ -0,0 +1,710 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import six +import cv2 +import numpy as np +import math +from PIL import Image + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, + img_mode='RGB', + channel_first=False, + ignore_orientation=False, + **kwargs): + self.img_mode = img_mode + self.channel_first = channel_first + self.ignore_orientation = ignore_orientation + + def __call__(self, data): + img = data['image'] + if six.PY2: + assert isinstance(img, str) and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert isinstance(img, bytes) and len( + img) > 0, "invalid input 'img' in DecodeImage" + img = np.frombuffer(img, dtype='uint8') + if self.ignore_orientation: + img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION | + cv2.IMREAD_COLOR) + else: + img = cv2.imdecode(img, 1) + if img is None: + return None + if self.img_mode == 'GRAY': + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + elif self.img_mode == 'RGB': + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + data['image'] = img + return data + +class StandardizeImage(object): + """normalize image + Args: + mean (list): im - mean + std (list): im / std + is_scale (bool): whether need im / 255 + norm_type (str): type in ['mean_std', 'none'] + """ + + def __init__(self, mean, std, is_scale=True, norm_type='mean_std'): + self.mean = mean + self.std = std + self.is_scale = is_scale + self.norm_type = norm_type + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.astype(np.float32, copy=False) + if self.is_scale: + scale = 1.0 / 255.0 + im *= scale + + if self.norm_type == 'mean_std': + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im -= mean + im /= std + return im, im_info + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs): + if isinstance(scale, str): + scale = eval(scale) + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, data): + img = data['image'] + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + assert isinstance(img, + np.ndarray), "invalid input 'img' in NormalizeImage" + data['image'] = ( + img.astype('float32') * self.scale - self.mean) / self.std + return data + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self, **kwargs): + pass + + def __call__(self, data): + img = data['image'] + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + data['image'] = img.transpose((2, 0, 1)) + return data + + +class Fasttext(object): + def __init__(self, path="None", **kwargs): + import fasttext + self.fast_model = fasttext.load_model(path) + + def __call__(self, data): + label = data['label'] + fast_label = self.fast_model[label] + data['fast_label'] = fast_label + return data + + +class KeepKeys(object): + def __init__(self, keep_keys, **kwargs): + self.keep_keys = keep_keys + + def __call__(self, data): + data_list = [] + for key in self.keep_keys: + data_list.append(data[key]) + return data_list + + +class Pad(object): + def __init__(self, size=None, size_div=32, **kwargs): + if size is not None and not isinstance(size, (int, list, tuple)): + raise TypeError("Type of target_size is invalid. Now is {}".format( + type(size))) + if isinstance(size, int): + size = [size, size] + self.size = size + self.size_div = size_div + + def __call__(self, data): + + img = data['image'] + img_h, img_w = img.shape[0], img.shape[1] + if self.size: + resize_h2, resize_w2 = self.size + assert ( + img_h < resize_h2 and img_w < resize_w2 + ), '(h, w) of target size should be greater than (img_h, img_w)' + else: + resize_h2 = max( + int(math.ceil(img.shape[0] / self.size_div) * self.size_div), + self.size_div) + resize_w2 = max( + int(math.ceil(img.shape[1] / self.size_div) * self.size_div), + self.size_div) + img = cv2.copyMakeBorder( + img, + 0, + resize_h2 - img_h, + 0, + resize_w2 - img_w, + cv2.BORDER_CONSTANT, + value=0) + data['image'] = img + return data + + +class LinearResize(object): + """resize image by target_size and max_size + Args: + target_size (int): the target size of image + keep_ratio (bool): whether keep_ratio or not, default true + interp (int): method of resize + """ + + def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + self.keep_ratio = keep_ratio + self.interp = interp + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + im_channel = im.shape[2] + im_scale_y, im_scale_x = self.generate_scale(im) + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') + im_info['scale_factor'] = np.array( + [im_scale_y, im_scale_x]).astype('float32') + return im, im_info + + def generate_scale(self, im): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(self.target_size) + target_size_max = np.max(self.target_size) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = self.target_size + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + +class Resize(object): + def __init__(self, size=(640, 640), **kwargs): + self.size = size + + def resize_image(self, img): + resize_h, resize_w = self.size + ori_h, ori_w = img.shape[:2] # (h, w, c) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + return img, [ratio_h, ratio_w] + + def __call__(self, data): + img = data['image'] + if 'polys' in data: + text_polys = data['polys'] + + img_resize, [ratio_h, ratio_w] = self.resize_image(img) + if 'polys' in data: + new_boxes = [] + for box in text_polys: + new_box = [] + for cord in box: + new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) + new_boxes.append(new_box) + data['polys'] = np.array(new_boxes, dtype=np.float32) + data['image'] = img_resize + return data + + +class DetResizeForTest(object): + def __init__(self, **kwargs): + super(DetResizeForTest, self).__init__() + self.resize_type = 0 + self.keep_ratio = False + if 'image_shape' in kwargs: + self.image_shape = kwargs['image_shape'] + self.resize_type = 1 + if 'keep_ratio' in kwargs: + self.keep_ratio = kwargs['keep_ratio'] + elif 'limit_side_len' in kwargs: + self.limit_side_len = kwargs['limit_side_len'] + self.limit_type = kwargs.get('limit_type', 'min') + elif 'resize_long' in kwargs: + self.resize_type = 2 + self.resize_long = kwargs.get('resize_long', 960) + else: + self.limit_side_len = 736 + self.limit_type = 'min' + + def __call__(self, data): + img = data['image'] + src_h, src_w, _ = img.shape + if sum([src_h, src_w]) < 64: + img = self.image_padding(img) + + if self.resize_type == 0: + # img, shape = self.resize_image_type0(img) + img, [ratio_h, ratio_w] = self.resize_image_type0(img) + elif self.resize_type == 2: + img, [ratio_h, ratio_w] = self.resize_image_type2(img) + else: + # img, shape = self.resize_image_type1(img) + img, [ratio_h, ratio_w] = self.resize_image_type1(img) + data['image'] = img + data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) + return data + + def image_padding(self, im, value=0): + h, w, c = im.shape + im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value + im_pad[:h, :w, :] = im + return im_pad + + def resize_image_type1(self, img): + resize_h, resize_w = self.image_shape + ori_h, ori_w = img.shape[:2] # (h, w, c) + if self.keep_ratio is True: + resize_w = ori_w * resize_h / ori_h + N = math.ceil(resize_w / 32) + resize_w = N * 32 + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + # return img, np.array([ori_h, ori_w]) + return img, [ratio_h, ratio_w] + + def resize_image_type0(self, img): + """ + resize image to a size multiple of 32 which is required by the network + args: + img(array): array with shape [h, w, c] + return(tuple): + img, (ratio_h, ratio_w) + """ + limit_side_len = self.limit_side_len + h, w, c = img.shape + + # limit the max side + if self.limit_type == 'max': + if max(h, w) > limit_side_len: + if h > w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + elif self.limit_type == 'min': + if min(h, w) < limit_side_len: + if h < w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + elif self.limit_type == 'resize_long': + ratio = float(limit_side_len) / max(h, w) + else: + raise Exception('not support limit type, image ') + resize_h = int(h * ratio) + resize_w = int(w * ratio) + + resize_h = max(int(round(resize_h / 32) * 32), 32) + resize_w = max(int(round(resize_w / 32) * 32), 32) + + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + img = cv2.resize(img, (int(resize_w), int(resize_h))) + except BaseException: + print(img.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return img, [ratio_h, ratio_w] + + def resize_image_type2(self, img): + h, w, _ = img.shape + + resize_w = w + resize_h = h + + if resize_h > resize_w: + ratio = float(self.resize_long) / resize_h + else: + ratio = float(self.resize_long) / resize_w + + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + img = cv2.resize(img, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + + return img, [ratio_h, ratio_w] + + +class E2EResizeForTest(object): + def __init__(self, **kwargs): + super(E2EResizeForTest, self).__init__() + self.max_side_len = kwargs['max_side_len'] + self.valid_set = kwargs['valid_set'] + + def __call__(self, data): + img = data['image'] + src_h, src_w, _ = img.shape + if self.valid_set == 'totaltext': + im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext( + img, max_side_len=self.max_side_len) + else: + im_resized, (ratio_h, ratio_w) = self.resize_image( + img, max_side_len=self.max_side_len) + data['image'] = im_resized + data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) + return data + + def resize_image_for_totaltext(self, im, max_side_len=512): + + h, w, _ = im.shape + resize_w = w + resize_h = h + ratio = 1.25 + if h * ratio > max_side_len: + ratio = float(max_side_len) / resize_h + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def resize_image(self, im, max_side_len=512): + """ + resize image to a size multiple of max_stride which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + """ + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # Fix the longer side + if resize_h > resize_w: + ratio = float(max_side_len) / resize_h + else: + ratio = float(max_side_len) / resize_w + + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + + return im, (ratio_h, ratio_w) + + +class KieResize(object): + def __init__(self, **kwargs): + super(KieResize, self).__init__() + self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[ + 'img_scale'][1] + + def __call__(self, data): + img = data['image'] + points = data['points'] + src_h, src_w, _ = img.shape + im_resized, scale_factor, [ratio_h, ratio_w + ], [new_h, new_w] = self.resize_image(img) + resize_points = self.resize_boxes(img, points, scale_factor) + data['ori_image'] = img + data['ori_boxes'] = points + data['points'] = resize_points + data['image'] = im_resized + data['shape'] = np.array([new_h, new_w]) + return data + + def resize_image(self, img): + norm_img = np.zeros([1024, 1024, 3], dtype='float32') + scale = [512, 1024] + h, w = img.shape[:2] + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float( + scale_factor) + 0.5) + max_stride = 32 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(img, (resize_w, resize_h)) + new_h, new_w = im.shape[:2] + w_scale = new_w / w + h_scale = new_h / h + scale_factor = np.array( + [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) + norm_img[:new_h, :new_w, :] = im + return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w] + + def resize_boxes(self, im, points, scale_factor): + points = points * scale_factor + img_shape = im.shape[:2] + points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1]) + points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0]) + return points + + +class SRResize(object): + def __init__(self, + imgH=32, + imgW=128, + down_sample_scale=4, + keep_ratio=False, + min_ratio=1, + mask=False, + infer_mode=False, + **kwargs): + self.imgH = imgH + self.imgW = imgW + self.keep_ratio = keep_ratio + self.min_ratio = min_ratio + self.down_sample_scale = down_sample_scale + self.mask = mask + self.infer_mode = infer_mode + + def __call__(self, data): + imgH = self.imgH + imgW = self.imgW + images_lr = data["image_lr"] + transform2 = ResizeNormalize( + (imgW // self.down_sample_scale, imgH // self.down_sample_scale)) + images_lr = transform2(images_lr) + data["img_lr"] = images_lr + if self.infer_mode: + return data + + images_HR = data["image_hr"] + label_strs = data["label"] + transform = ResizeNormalize((imgW, imgH)) + images_HR = transform(images_HR) + data["img_hr"] = images_HR + return data + + +class ResizeNormalize(object): + def __init__(self, size, interpolation=Image.BICUBIC): + self.size = size + self.interpolation = interpolation + + def __call__(self, img): + img = img.resize(self.size, self.interpolation) + img_numpy = np.array(img).astype("float32") + img_numpy = img_numpy.transpose((2, 0, 1)) / 255 + return img_numpy + + +class GrayImageChannelFormat(object): + """ + format gray scale image's channel: (3,h,w) -> (1,h,w) + Args: + inverse: inverse gray image + """ + + def __init__(self, inverse=False, **kwargs): + self.inverse = inverse + + def __call__(self, data): + img = data['image'] + img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + img_expanded = np.expand_dims(img_single_channel, 0) + + if self.inverse: + data['image'] = np.abs(img_expanded - 1) + else: + data['image'] = img_expanded + + data['src_image'] = img + return data + + +class Permute(object): + """permute image + Args: + to_bgr (bool): whether convert RGB to BGR + channel_first (bool): whether convert HWC to CHW + """ + + def __init__(self, ): + super(Permute, self).__init__() + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.transpose((2, 0, 1)).copy() + return im, im_info + + +class PadStride(object): + """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config + Args: + stride (bool): model with FPN need image shape % stride == 0 + """ + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + coarsest_stride = self.coarsest_stride + if coarsest_stride <= 0: + return im, im_info + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + return padding_im, im_info + + +def decode_image(im_file, im_info): + """read rgb image + Args: + im_file (str|np.ndarray): input can be image path or np.ndarray + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + else: + im = im_file + im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) + im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) + return im, im_info + + +def preprocess(im, preprocess_ops): + # process image by preprocess_ops + im_info = { + 'scale_factor': np.array( + [1., 1.], dtype=np.float32), + 'im_shape': None, + } + im, im_info = decode_image(im, im_info) + for operator in preprocess_ops: + im, im_info = operator(im, im_info) + return im, im_info \ No newline at end of file diff --git a/deepdoc/visual/postprocess.py b/deepdoc/visual/postprocess.py new file mode 100644 index 0000000..b3d1ac7 --- /dev/null +++ b/deepdoc/visual/postprocess.py @@ -0,0 +1,354 @@ +import copy + +import numpy as np +import cv2 +import paddle +from shapely.geometry import Polygon +import pyclipper + + +def build_post_process(config, global_config=None): + support_dict = ['DBPostProcess', 'CTCLabelDecode'] + + config = copy.deepcopy(config) + module_name = config.pop('name') + if module_name == "None": + return + if global_config is not None: + config.update(global_config) + assert module_name in support_dict, Exception( + 'post process only support {}'.format(support_dict)) + module_class = eval(module_name)(**config) + return module_class + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, + thresh=0.3, + box_thresh=0.7, + max_candidates=1000, + unclip_ratio=2.0, + use_dilation=False, + score_mode="fast", + box_type='quad', + **kwargs): + self.thresh = thresh + self.box_thresh = box_thresh + self.max_candidates = max_candidates + self.unclip_ratio = unclip_ratio + self.min_size = 3 + self.score_mode = score_mode + self.box_type = box_type + assert score_mode in [ + "slow", "fast" + ], "Score mode must be in [slow, fast] but got: {}".format(score_mode) + + self.dilation_kernel = None if not use_dilation else np.array( + [[1, 1], [1, 1]]) + + def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + boxes = [] + scores = [] + + contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), + cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + + for contour in contours[:self.max_candidates]: + epsilon = 0.002 * cv2.arcLength(contour, True) + approx = cv2.approxPolyDP(contour, epsilon, True) + points = approx.reshape((-1, 2)) + if points.shape[0] < 4: + continue + + score = self.box_score_fast(pred, points.reshape(-1, 2)) + if self.box_thresh > score: + continue + + if points.shape[0] > 2: + box = self.unclip(points, self.unclip_ratio) + if len(box) > 1: + continue + else: + continue + box = box.reshape(-1, 2) + + _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2))) + if sside < self.min_size + 2: + continue + + box = np.array(box) + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.tolist()) + scores.append(score) + return boxes, scores + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, + cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.max_candidates) + + boxes = [] + scores = [] + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + if self.score_mode == "fast": + score = self.box_score_fast(pred, points.reshape(-1, 2)) + else: + score = self.box_score_slow(pred, contour) + if self.box_thresh > score: + continue + + box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.astype("int32")) + scores.append(score) + return np.array(boxes, dtype="int32"), scores + + def unclip(self, box, unclip_ratio): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [ + points[index_1], points[index_2], points[index_3], points[index_4] + ] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + ''' + box_score_fast: use bbox mean score as the mean score + ''' + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def box_score_slow(self, bitmap, contour): + ''' + box_score_slow: use polyon mean score as the mean score + ''' + h, w = bitmap.shape[:2] + contour = contour.copy() + contour = np.reshape(contour, (-1, 2)) + + xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) + xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) + ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) + ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + + contour[:, 0] = contour[:, 0] - xmin + contour[:, 1] = contour[:, 1] - ymin + + cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, outs_dict, shape_list): + pred = outs_dict['maps'] + if isinstance(pred, paddle.Tensor): + pred = pred.numpy() + pred = pred[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] + if self.dilation_kernel is not None: + mask = cv2.dilate( + np.array(segmentation[batch_index]).astype(np.uint8), + self.dilation_kernel) + else: + mask = segmentation[batch_index] + if self.box_type == 'poly': + boxes, scores = self.polygons_from_bitmap(pred[batch_index], + mask, src_w, src_h) + elif self.box_type == 'quad': + boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, + src_w, src_h) + else: + raise ValueError( + "box_type can only be one of ['quad', 'poly']") + + boxes_batch.append({'points': boxes}) + return boxes_batch + + +class BaseRecLabelDecode(object): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False): + self.beg_str = "sos" + self.end_str = "eos" + self.reverse = False + self.character_str = [] + + if character_dict_path is None: + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + else: + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n").strip("\r\n") + self.character_str.append(line) + if use_space_char: + self.character_str.append(" ") + dict_character = list(self.character_str) + if 'arabic' in character_dict_path: + self.reverse = True + + dict_character = self.add_special_char(dict_character) + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def pred_reverse(self, pred): + pred_re = [] + c_current = '' + for c in pred: + if not bool(re.search('[a-zA-Z0-9 :*./%+-]', c)): + if c_current != '': + pred_re.append(c_current) + pred_re.append(c) + c_current = '' + else: + c_current += c + if c_current != '': + pred_re.append(c_current) + + return ''.join(pred_re[::-1]) + + def add_special_char(self, dict_character): + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + batch_size = len(text_index) + for batch_idx in range(batch_size): + selection = np.ones(len(text_index[batch_idx]), dtype=bool) + if is_remove_duplicate: + selection[1:] = text_index[batch_idx][1:] != text_index[ + batch_idx][:-1] + for ignored_token in ignored_tokens: + selection &= text_index[batch_idx] != ignored_token + + char_list = [ + self.character[text_id] + for text_id in text_index[batch_idx][selection] + ] + if text_prob is not None: + conf_list = text_prob[batch_idx][selection] + else: + conf_list = [1] * len(selection) + if len(conf_list) == 0: + conf_list = [0] + + text = ''.join(char_list) + + if self.reverse: # for arabic rec + text = self.pred_reverse(text) + + result_list.append((text, np.mean(conf_list).tolist())) + return result_list + + def get_ignored_tokens(self): + return [0] # for ctc blank + + +class CTCLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(CTCLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, tuple) or isinstance(preds, list): + preds = preds[-1] + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) + if label is None: + return text + label = self.decode(label) + return text, label + + def add_special_char(self, dict_character): + dict_character = ['blank'] + dict_character + return dict_character diff --git a/deepdoc/visual/recognizer.py b/deepdoc/visual/recognizer.py new file mode 100644 index 0000000..09ccbb3 --- /dev/null +++ b/deepdoc/visual/recognizer.py @@ -0,0 +1,139 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import onnxruntime as ort +from huggingface_hub import snapshot_download + +from .operators import * +from rag.settings import cron_logger + + +class Recognizer(object): + def __init__(self, label_list, task_name, model_dir=None): + """ + If you have trouble downloading HuggingFace models, -_^ this might help!! + + For Linux: + export HF_ENDPOINT=https://hf-mirror.com + + For Windows: + Good luck + ^_- + + """ + if not model_dir: + model_dir = snapshot_download(repo_id="InfiniFlow/ocr") + + model_file_path = os.path.join(model_dir, task_name + ".onnx") + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + if ort.get_device() == "GPU": + self.ort_sess = ort.InferenceSession(model_file_path, providers=['CUDAExecutionProvider']) + else: + self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider']) + self.label_list = label_list + + def create_inputs(self, imgs, im_info): + """generate input for different model type + Args: + imgs (list(numpy)): list of images (np.ndarray) + im_info (list(dict)): list of image info + Returns: + inputs (dict): input of model + """ + inputs = {} + + im_shape = [] + scale_factor = [] + if len(imgs) == 1: + inputs['image'] = np.array((imgs[0],)).astype('float32') + inputs['im_shape'] = np.array( + (im_info[0]['im_shape'],)).astype('float32') + inputs['scale_factor'] = np.array( + (im_info[0]['scale_factor'],)).astype('float32') + return inputs + + for e in im_info: + im_shape.append(np.array((e['im_shape'],)).astype('float32')) + scale_factor.append(np.array((e['scale_factor'],)).astype('float32')) + + inputs['im_shape'] = np.concatenate(im_shape, axis=0) + inputs['scale_factor'] = np.concatenate(scale_factor, axis=0) + + imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs] + max_shape_h = max([e[0] for e in imgs_shape]) + max_shape_w = max([e[1] for e in imgs_shape]) + padding_imgs = [] + for img in imgs: + im_c, im_h, im_w = img.shape[:] + padding_im = np.zeros( + (im_c, max_shape_h, max_shape_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = img + padding_imgs.append(padding_im) + inputs['image'] = np.stack(padding_imgs, axis=0) + return inputs + + def preprocess(self, image_list): + preprocess_ops = [] + for op_info in [ + {'interp': 2, 'keep_ratio': False, 'target_size': [800, 608], 'type': 'LinearResize'}, + {'is_scale': True, 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'type': 'StandardizeImage'}, + {'type': 'Permute'}, + {'stride': 32, 'type': 'PadStride'} + ]: + new_op_info = op_info.copy() + op_type = new_op_info.pop('type') + preprocess_ops.append(eval(op_type)(**new_op_info)) + + inputs = [] + for im_path in image_list: + im, im_info = preprocess(im_path, preprocess_ops) + inputs.append({"image": np.array((im,)).astype('float32'), "scale_factor": np.array((im_info["scale_factor"],)).astype('float32')}) + return inputs + + + def __call__(self, image_list, thr=0.7, batch_size=16): + res = [] + imgs = [] + for i in range(len(image_list)): + if not isinstance(image_list[i], np.ndarray): + imgs.append(np.array(image_list[i])) + else: imgs.append(image_list[i]) + + batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size) + for i in range(batch_loop_cnt): + start_index = i * batch_size + end_index = min((i + 1) * batch_size, len(imgs)) + batch_image_list = imgs[start_index:end_index] + inputs = self.preprocess(batch_image_list) + for ins in inputs: + bb = [] + for b in self.ort_sess.run(None, ins)[0]: + clsid, bbox, score = int(b[0]), b[2:], b[1] + if score < thr: + continue + if clsid >= len(self.label_list): + cron_logger.warning(f"bad category id") + continue + bb.append({ + "type": self.label_list[clsid].lower(), + "bbox": [float(t) for t in bbox.tolist()], + "score": float(score) + }) + res.append(bb) + + #seeit.save_results(image_list, res, self.label_list, threshold=thr) + + return res diff --git a/deepdoc/visual/seeit.py b/deepdoc/visual/seeit.py new file mode 100644 index 0000000..70e547f --- /dev/null +++ b/deepdoc/visual/seeit.py @@ -0,0 +1,83 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import PIL +from PIL import ImageDraw + + +def save_results(image_list, results, labels, output_dir='output/', threshold=0.5): + if not os.path.exists(output_dir): + os.makedirs(output_dir) + for idx, im in enumerate(image_list): + im = draw_box(im, results[idx], labels, threshold=threshold) + + out_path = os.path.join(output_dir, f"{idx}.jpg") + im.save(out_path, quality=95) + print("save result to: " + out_path) + + +def draw_box(im, result, lables, threshold=0.5): + draw_thickness = min(im.size) // 320 + draw = ImageDraw.Draw(im) + color_list = get_color_map_list(len(lables)) + clsid2color = {n.lower():color_list[i] for i,n in enumerate(lables)} + result = [r for r in result if r["score"] >= threshold] + + for dt in result: + color = tuple(clsid2color[dt["type"]]) + xmin, ymin, xmax, ymax = dt["bbox"] + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=draw_thickness, + fill=color) + + # draw label + text = "{} {:.4f}".format(dt["type"], dt["score"]) + tw, th = imagedraw_textsize_c(draw, text) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + return im + + +def get_color_map_list(num_classes): + """ + Args: + num_classes (int): number of class + Returns: + color_map (list): RGB color list + """ + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def imagedraw_textsize_c(draw, text): + if int(PIL.__version__.split('.')[0]) < 10: + tw, th = draw.textsize(text) + else: + left, top, right, bottom = draw.textbbox((0, 0), text) + tw, th = right - left, bottom - top + + return tw, th diff --git a/rag/app/book.py b/rag/app/book.py index 75b9f08..c9996ae 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -1,15 +1,24 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import copy -import random import re -import numpy as np -from rag.parser import bullets_category, BULLET_PATTERN, is_english, tokenize, remove_contents_table, \ +from deepdoc.parser import bullets_category, is_english, tokenize, remove_contents_table, \ hierarchical_merge, make_colon_as_title, naive_merge, random_choices from rag.nlp import huqie -from rag.parser.docx_parser import HuDocxParser -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import PdfParser, DocxParser -class Pdf(HuParser): +class Pdf(PdfParser): def __call__(self, filename, binary=None, from_page=0, to_page=100000, zoomin=3, callback=None): self.__images__( @@ -21,7 +30,7 @@ class Pdf(HuParser): from timeit import default_timer as timer start = timer() - self._layouts_paddle(zoomin) + self._layouts_rec(zoomin) callback(0.47, "Layout analysis finished") print("paddle layouts:", timer() - start) self._table_transformer_job(zoomin) @@ -53,7 +62,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k sections,tbls = [], [] if re.search(r"\.docx?$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") - doc_parser = HuDocxParser() + doc_parser = DocxParser() # TODO: table of contents need to be removed sections, tbls = doc_parser(binary if binary else filename, from_page=from_page, to_page=to_page) remove_contents_table(sections, eng=is_english(random_choices([t for t,_ in sections], k=200))) diff --git a/rag/app/laws.py b/rag/app/laws.py index 0c4bca1..f249870 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -1,16 +1,27 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import copy import re from io import BytesIO from docx import Document -from rag.parser import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \ +from deepdoc.parser import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \ make_colon_as_title from rag.nlp import huqie -from rag.parser.docx_parser import HuDocxParser -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import PdfParser, DocxParser from rag.settings import cron_logger -class Docx(HuDocxParser): +class Docx(DocxParser): def __init__(self): pass @@ -35,7 +46,7 @@ class Docx(HuDocxParser): return [l for l in lines if l] -class Pdf(HuParser): +class Pdf(PdfParser): def __call__(self, filename, binary=None, from_page=0, to_page=100000, zoomin=3, callback=None): self.__images__( @@ -47,7 +58,7 @@ class Pdf(HuParser): from timeit import default_timer as timer start = timer() - self._layouts_paddle(zoomin) + self._layouts_rec(zoomin) callback(0.77, "Layout analysis finished") cron_logger.info("paddle layouts:".format((timer()-start)/(self.total_page+0.1))) self._naive_vertical_merge() diff --git a/rag/app/manual.py b/rag/app/manual.py index e8a9dad..9b051ec 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -1,12 +1,12 @@ import copy import re -from rag.parser import tokenize +from deepdoc.parser import tokenize from rag.nlp import huqie -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import PdfParser from rag.utils import num_tokens_from_string -class Pdf(HuParser): +class Pdf(PdfParser): def __call__(self, filename, binary=None, from_page=0, to_page=100000, zoomin=3, callback=None): self.__images__( @@ -18,7 +18,7 @@ class Pdf(HuParser): from timeit import default_timer as timer start = timer() - self._layouts_paddle(zoomin) + self._layouts_rec(zoomin) callback(0.5, "Layout analysis finished.") print("paddle layouts:", timer() - start) self._table_transformer_job(zoomin) diff --git a/rag/app/naive.py b/rag/app/naive.py index 8c80d5f..aceb22f 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -1,13 +1,25 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import copy import re from rag.app import laws -from rag.parser import is_english, tokenize, naive_merge +from deepdoc.parser import is_english, tokenize, naive_merge from rag.nlp import huqie -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import PdfParser from rag.settings import cron_logger -class Pdf(HuParser): +class Pdf(PdfParser): def __call__(self, filename, binary=None, from_page=0, to_page=100000, zoomin=3, callback=None): self.__images__( @@ -19,7 +31,7 @@ class Pdf(HuParser): from timeit import default_timer as timer start = timer() - self._layouts_paddle(zoomin) + self._layouts_rec(zoomin) callback(0.77, "Layout analysis finished") cron_logger.info("paddle layouts:".format((timer() - start) / (self.total_page + 0.1))) self._naive_vertical_merge() diff --git a/rag/app/paper.py b/rag/app/paper.py index 4f464ac..ac9afd2 100644 --- a/rag/app/paper.py +++ b/rag/app/paper.py @@ -1,16 +1,28 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import copy import re from collections import Counter from api.db import ParserType -from rag.parser import tokenize +from deepdoc.parser import tokenize from rag.nlp import huqie -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import PdfParser import numpy as np from rag.utils import num_tokens_from_string -class Pdf(HuParser): +class Pdf(PdfParser): def __init__(self): self.model_speciess = ParserType.PAPER.value super().__init__() @@ -26,7 +38,7 @@ class Pdf(HuParser): from timeit import default_timer as timer start = timer() - self._layouts_paddle(zoomin) + self._layouts_rec(zoomin) callback(0.47, "Layout analysis finished") print("paddle layouts:", timer() - start) self._table_transformer_job(zoomin) diff --git a/rag/app/presentation.py b/rag/app/presentation.py index afcb8f2..2cb6606 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -1,11 +1,22 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import copy import re from io import BytesIO from pptx import Presentation - -from rag.parser import tokenize, is_english +from deepdoc.parser import tokenize, is_english from rag.nlp import huqie -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import PdfParser class Ppt(object): @@ -58,7 +69,7 @@ class Ppt(object): return [(txts[i], imgs[i]) for i in range(len(txts))] -class Pdf(HuParser): +class Pdf(PdfParser): def __init__(self): super().__init__() @@ -74,7 +85,7 @@ class Pdf(HuParser): assert len(self.boxes) == len(self.page_images), "{} vs. {}".format(len(self.boxes), len(self.page_images)) res = [] #################### More precisely ################### - # self._layouts_paddle(zoomin) + # self._layouts_rec(zoomin) # self._text_merge() # pages = {} # for b in self.boxes: diff --git a/rag/app/qa.py b/rag/app/qa.py index 9d55d1b..34615a8 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -1,13 +1,25 @@ -import random +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import re from io import BytesIO from nltk import word_tokenize from openpyxl import load_workbook -from rag.parser import is_english, random_choices +from deepdoc.parser import is_english, random_choices from rag.nlp import huqie, stemmer +from deepdoc.parser import ExcelParser -class Excel(object): +class Excel(ExcelParser): def __call__(self, fnm, binary=None, callback=None): if not binary: wb = load_workbook(fnm) diff --git a/rag/app/resume.py b/rag/app/resume.py index fd9dc03..8b4ca01 100644 --- a/rag/app/resume.py +++ b/rag/app/resume.py @@ -1,59 +1,82 @@ -import copy +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import base64 +import datetime import json -import os import re + +import pandas as pd import requests from api.db.services.knowledgebase_service import KnowledgebaseService -from api.settings import stat_logger from rag.nlp import huqie - +from deepdoc.parser.resume import refactor +from deepdoc.parser.resume import step_one, step_two from rag.settings import cron_logger from rag.utils import rmSpace forbidden_select_fields4resume = [ "name_pinyin_kwd", "edu_first_fea_kwd", "degree_kwd", "sch_rank_kwd", "edu_fea_kwd" ] +def remote_call(filename, binary): + q = { + "header": { + "uid": 1, + "user": "kevinhu", + "log_id": filename + }, + "request": { + "p": { + "request_id": "1", + "encrypt_type": "base64", + "filename": filename, + "langtype": '', + "fileori": base64.b64encode(binary.stream.read()).decode('utf-8') + }, + "c": "resume_parse_module", + "m": "resume_parse" + } + } + for _ in range(3): + try: + resume = requests.post("http://127.0.0.1:61670/tog", data=json.dumps(q)) + resume = resume.json()["response"]["results"] + resume = refactor(resume) + for k in ["education", "work", "project", "training", "skill", "certificate", "language"]: + if not resume.get(k) and k in resume: del resume[k] + + resume = step_one.refactor(pd.DataFrame([{"resume_content": json.dumps(resume), "tob_resume_id": "x", + "updated_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}])) + resume = step_two.parse(resume) + return resume + except Exception as e: + cron_logger.error("Resume parser error: "+str(e)) + return {} + def chunk(filename, binary=None, callback=None, **kwargs): """ The supported file formats are pdf, docx and txt. - To maximize the effectiveness, parse the resume correctly, - please visit https://github.com/infiniflow/ragflow, and sign in the our demo web-site - to get token. It's FREE! - Set INFINIFLOW_SERVER and INFINIFLOW_TOKEN in '.env' file or - using 'export' to set both environment variables: INFINIFLOW_SERVER and INFINIFLOW_TOKEN in docker container. + To maximize the effectiveness, parse the resume correctly, please concat us: https://github.com/infiniflow/ragflow """ if not re.search(r"\.(pdf|doc|docx|txt)$", filename, flags=re.IGNORECASE): raise NotImplementedError("file type not supported yet(pdf supported)") - url = os.environ.get("INFINIFLOW_SERVER") - token = os.environ.get("INFINIFLOW_TOKEN") - if not url or not token: - stat_logger.warning( - "INFINIFLOW_SERVER is not specified. To maximize the effectiveness, please visit https://github.com/infiniflow/ragflow, and sign in the our demo web site to get token. It's FREE! Using 'export' to set both environment variables: INFINIFLOW_SERVER and INFINIFLOW_TOKEN.") - return [] - if not binary: with open(filename, "rb") as f: binary = f.read() - def remote_call(): - nonlocal filename, binary - for _ in range(3): - try: - res = requests.post(url + "/v1/layout/resume/", files=[(filename, binary)], - headers={"Authorization": token}, timeout=180) - res = res.json() - if res["retcode"] != 0: - raise RuntimeError(res["retmsg"]) - return res["data"] - except RuntimeError as e: - raise e - except Exception as e: - cron_logger.error("resume parsing:" + str(e)) - callback(0.2, "Resume parsing is going on...") - resume = remote_call() + resume = remote_call(filename, binary) if len(resume.keys()) < 7: callback(-1, "Resume is not successfully parsed.") return [] diff --git a/rag/app/table.py b/rag/app/table.py index c80b3fb..6352843 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -1,3 +1,15 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import copy import re from io import BytesIO @@ -8,11 +20,12 @@ from openpyxl import load_workbook from dateutil.parser import parse as datetime_parse from api.db.services.knowledgebase_service import KnowledgebaseService -from rag.parser import is_english, tokenize -from rag.nlp import huqie, stemmer +from deepdoc.parser import is_english, tokenize +from rag.nlp import huqie +from deepdoc.parser import ExcelParser -class Excel(object): +class Excel(ExcelParser): def __call__(self, fnm, binary=None, callback=None): if not binary: wb = load_workbook(fnm) diff --git a/rag/nlp/huchunk.py b/rag/nlp/huchunk.py index ba81a46..bb2d46f 100644 --- a/rag/nlp/huchunk.py +++ b/rag/nlp/huchunk.py @@ -1,3 +1,15 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import re import os import copy @@ -443,13 +455,13 @@ if __name__ == "__main__": import sys sys.path.append(os.path.dirname(__file__) + "/../") if sys.argv[1].split(".")[-1].lower() == "pdf": - from parser import PdfParser + from deepdoc.parser import PdfParser ckr = PdfChunker(PdfParser()) if sys.argv[1].split(".")[-1].lower().find("doc") >= 0: - from parser import DocxParser + from deepdoc.parser import DocxParser ckr = DocxChunker(DocxParser()) if sys.argv[1].split(".")[-1].lower().find("xlsx") >= 0: - from parser import ExcelParser + from deepdoc.parser import ExcelParser ckr = ExcelChunker(ExcelParser()) # ckr.html(sys.argv[1]) diff --git a/rag/svr/task_broker.py b/rag/svr/task_broker.py index e501042..1204713 100644 --- a/rag/svr/task_broker.py +++ b/rag/svr/task_broker.py @@ -21,7 +21,7 @@ from datetime import datetime from api.db.db_models import Task from api.db.db_utils import bulk_insert_into_db from api.db.services.task_service import TaskService -from rag.parser.pdf_parser import HuParser +from deepdoc.parser import HuParser from rag.settings import cron_logger from rag.utils import MINIO from rag.utils import findMaxTm -- GitLab