diff --git a/pp.py b/pp.py index dd6a41d..dafa580 100755 --- a/pp.py +++ b/pp.py @@ -12,6 +12,7 @@ import requests import numpy as np import warnings import logging +import paddle from PIL import Image, ImageFile from paddleocr import PaddleOCR @@ -128,7 +129,7 @@ def process_images(conn, offset=0) -> int: max_confidence_language = max(confidences, key=confidences.get) languages = {'en': en, 'ch': ch, 'jp': jp, 'kr': kr, 'ru': ru} data = [{'text': text[0], 'confidence': text[1], 'coordinate': coord} for coord, text in languages[max_confidence_language]] - print("data:", data) + #print("data:", data) # 转换为字符串存储到索引库 obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) } @@ -137,15 +138,15 @@ def process_images(conn, offset=0) -> int: print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else obj["id"], obj["text"]) # 转换为 JSON 存储到数据库 - with conn.cursor() as cursor: + with conn.cursor() as c: data = json.dumps(data, ensure_ascii=False, cls=MyEncoder) - cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id)) + c.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id)) conn.commit() return offset+100 def main(): conn = connect_to_mysql() - offset = 1500 + offset = 2000 while True: offset = process_images(conn, offset) time.sleep(0) diff --git a/requirements.txt b/requirements.txt index 18dd039..c21a2e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,8 @@ -whell==0.45.0 -elasticsearch==8.11.0 +wheel==0.45.0 numpy==1.26.2 oss2==2.18.3 paddleocr==2.7.0.3 -paddleocr.egg==info +paddlepaddle-gpu=2.6.2 Pillow==10.1.0 PyMySQL==1.1.0 python-dotenv==1.0.0