This commit is contained in:
2023-12-02 15:18:15 +08:00
parent 0dbd957454
commit d78b9f63b2

16
pp.py
View File

@@ -48,6 +48,7 @@ def save_text(conn, id:int, text:str):
with conn.cursor() as cursor:
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (text, id))
EN = PaddleOCR(use_angle_cls=True, lang="en")
CH = PaddleOCR(use_angle_cls=True, lang="ch")
JP = PaddleOCR(use_angle_cls=True, lang="japan")
KR = PaddleOCR(use_angle_cls=True, lang="korean")
@@ -62,9 +63,18 @@ def process_images(conn, es):
if isinstance(image, Image.Image):
image = np.array(image)
print('---------------------', id, content)
print(CH.ocr(image, cls=True))
print(JP.ocr(image, cls=True))
print(KR.ocr(image, cls=True))
for line in EN.ocr(image, cls=True)[0]:
print('EN', line)
for line in CH.ocr(image, cls=True)[0]:
print('CH', line)
for line in JP.ocr(image, cls=True)[0]:
print('JP', line)
for line in KR.ocr(image, cls=True)[0]:
print('KR', line)
#print(EN.ocr(image, cls=True))
#print(CH.ocr(image, cls=True))
#print(JP.ocr(image, cls=True))
#print(KR.ocr(image, cls=True))
# item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1]
# text = ' '.join([x['text'] for x in item])
# print(id, text)