This commit is contained in:
散仙
2024-11-21 18:52:59 +08:00
parent 16163e42c0
commit 6500fd5b92
2 changed files with 7 additions and 7 deletions

9
pp.py
View File

@@ -12,6 +12,7 @@ import requests
import numpy as np
import warnings
import logging
import paddle
from PIL import Image, ImageFile
from paddleocr import PaddleOCR
@@ -128,7 +129,7 @@ def process_images(conn, offset=0) -> int:
max_confidence_language = max(confidences, key=confidences.get)
languages = {'en': en, 'ch': ch, 'jp': jp, 'kr': kr, 'ru': ru}
data = [{'text': text[0], 'confidence': text[1], 'coordinate': coord} for coord, text in languages[max_confidence_language]]
print("data:", data)
#print("data:", data)
# 转换为字符串存储到索引库
obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) }
@@ -137,15 +138,15 @@ def process_images(conn, offset=0) -> int:
print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else obj["id"], obj["text"])
# 转换为 JSON 存储到数据库
with conn.cursor() as cursor:
with conn.cursor() as c:
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
c.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
conn.commit()
return offset+100
def main():
conn = connect_to_mysql()
offset = 1500
offset = 2000
while True:
offset = process_images(conn, offset)
time.sleep(0)

View File

@@ -1,9 +1,8 @@
whell==0.45.0
elasticsearch==8.11.0
wheel==0.45.0
numpy==1.26.2
oss2==2.18.3
paddleocr==2.7.0.3
paddleocr.egg==info
paddlepaddle-gpu=2.6.2
Pillow==10.1.0
PyMySQL==1.1.0
python-dotenv==1.0.0