debug
This commit is contained in:
9
pp.py
9
pp.py
@@ -12,6 +12,7 @@ import requests
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import warnings
|
import warnings
|
||||||
import logging
|
import logging
|
||||||
|
import paddle
|
||||||
|
|
||||||
from PIL import Image, ImageFile
|
from PIL import Image, ImageFile
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
@@ -128,7 +129,7 @@ def process_images(conn, offset=0) -> int:
|
|||||||
max_confidence_language = max(confidences, key=confidences.get)
|
max_confidence_language = max(confidences, key=confidences.get)
|
||||||
languages = {'en': en, 'ch': ch, 'jp': jp, 'kr': kr, 'ru': ru}
|
languages = {'en': en, 'ch': ch, 'jp': jp, 'kr': kr, 'ru': ru}
|
||||||
data = [{'text': text[0], 'confidence': text[1], 'coordinate': coord} for coord, text in languages[max_confidence_language]]
|
data = [{'text': text[0], 'confidence': text[1], 'coordinate': coord} for coord, text in languages[max_confidence_language]]
|
||||||
print("data:", data)
|
#print("data:", data)
|
||||||
|
|
||||||
# 转换为字符串存储到索引库
|
# 转换为字符串存储到索引库
|
||||||
obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) }
|
obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) }
|
||||||
@@ -137,15 +138,15 @@ def process_images(conn, offset=0) -> int:
|
|||||||
print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else obj["id"], obj["text"])
|
print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else obj["id"], obj["text"])
|
||||||
|
|
||||||
# 转换为 JSON 存储到数据库
|
# 转换为 JSON 存储到数据库
|
||||||
with conn.cursor() as cursor:
|
with conn.cursor() as c:
|
||||||
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
|
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
|
||||||
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
|
c.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return offset+100
|
return offset+100
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
conn = connect_to_mysql()
|
conn = connect_to_mysql()
|
||||||
offset = 1500
|
offset = 2000
|
||||||
while True:
|
while True:
|
||||||
offset = process_images(conn, offset)
|
offset = process_images(conn, offset)
|
||||||
time.sleep(0)
|
time.sleep(0)
|
||||||
|
@@ -1,9 +1,8 @@
|
|||||||
whell==0.45.0
|
wheel==0.45.0
|
||||||
elasticsearch==8.11.0
|
|
||||||
numpy==1.26.2
|
numpy==1.26.2
|
||||||
oss2==2.18.3
|
oss2==2.18.3
|
||||||
paddleocr==2.7.0.3
|
paddleocr==2.7.0.3
|
||||||
paddleocr.egg==info
|
paddlepaddle-gpu=2.6.2
|
||||||
Pillow==10.1.0
|
Pillow==10.1.0
|
||||||
PyMySQL==1.1.0
|
PyMySQL==1.1.0
|
||||||
python-dotenv==1.0.0
|
python-dotenv==1.0.0
|
||||||
|
Reference in New Issue
Block a user