From d78b9f63b2e5ea6118897d5eeae0352f2529b3ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A7=89?= Date: Sat, 2 Dec 2023 15:18:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=97=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pp.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pp.py b/pp.py index ecfd4f1..961cbbe 100755 --- a/pp.py +++ b/pp.py @@ -48,6 +48,7 @@ def save_text(conn, id:int, text:str): with conn.cursor() as cursor: cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (text, id)) +EN = PaddleOCR(use_angle_cls=True, lang="en") CH = PaddleOCR(use_angle_cls=True, lang="ch") JP = PaddleOCR(use_angle_cls=True, lang="japan") KR = PaddleOCR(use_angle_cls=True, lang="korean") @@ -62,9 +63,18 @@ def process_images(conn, es): if isinstance(image, Image.Image): image = np.array(image) print('---------------------', id, content) - print(CH.ocr(image, cls=True)) - print(JP.ocr(image, cls=True)) - print(KR.ocr(image, cls=True)) + for line in EN.ocr(image, cls=True)[0]: + print('EN', line) + for line in CH.ocr(image, cls=True)[0]: + print('CH', line) + for line in JP.ocr(image, cls=True)[0]: + print('JP', line) + for line in KR.ocr(image, cls=True)[0]: + print('KR', line) + #print(EN.ocr(image, cls=True)) + #print(CH.ocr(image, cls=True)) + #print(JP.ocr(image, cls=True)) + #print(KR.ocr(image, cls=True)) # item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1] # text = ' '.join([x['text'] for x in item]) # print(id, text)