This commit is contained in:
散仙
2024-11-21 18:52:59 +08:00
parent 16163e42c0
commit 6500fd5b92
2 changed files with 7 additions and 7 deletions

9
pp.py
View File

@@ -12,6 +12,7 @@ import requests
import numpy as np import numpy as np
import warnings import warnings
import logging import logging
import paddle
from PIL import Image, ImageFile from PIL import Image, ImageFile
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
@@ -128,7 +129,7 @@ def process_images(conn, offset=0) -> int:
max_confidence_language = max(confidences, key=confidences.get) max_confidence_language = max(confidences, key=confidences.get)
languages = {'en': en, 'ch': ch, 'jp': jp, 'kr': kr, 'ru': ru} languages = {'en': en, 'ch': ch, 'jp': jp, 'kr': kr, 'ru': ru}
data = [{'text': text[0], 'confidence': text[1], 'coordinate': coord} for coord, text in languages[max_confidence_language]] data = [{'text': text[0], 'confidence': text[1], 'coordinate': coord} for coord, text in languages[max_confidence_language]]
print("data:", data) #print("data:", data)
# 转换为字符串存储到索引库 # 转换为字符串存储到索引库
obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) } obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) }
@@ -137,15 +138,15 @@ def process_images(conn, offset=0) -> int:
print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else obj["id"], obj["text"]) print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else obj["id"], obj["text"])
# 转换为 JSON 存储到数据库 # 转换为 JSON 存储到数据库
with conn.cursor() as cursor: with conn.cursor() as c:
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder) data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id)) c.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
conn.commit() conn.commit()
return offset+100 return offset+100
def main(): def main():
conn = connect_to_mysql() conn = connect_to_mysql()
offset = 1500 offset = 2000
while True: while True:
offset = process_images(conn, offset) offset = process_images(conn, offset)
time.sleep(0) time.sleep(0)

View File

@@ -1,9 +1,8 @@
whell==0.45.0 wheel==0.45.0
elasticsearch==8.11.0
numpy==1.26.2 numpy==1.26.2
oss2==2.18.3 oss2==2.18.3
paddleocr==2.7.0.3 paddleocr==2.7.0.3
paddleocr.egg==info paddlepaddle-gpu=2.6.2
Pillow==10.1.0 Pillow==10.1.0
PyMySQL==1.1.0 PyMySQL==1.1.0
python-dotenv==1.0.0 python-dotenv==1.0.0