From d78b9f63b2e5ea6118897d5eeae0352f2529b3ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=A7=89?= <huan0016@gmail.com>
Date: Sat, 2 Dec 2023 15:18:15 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=97=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pp.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/pp.py b/pp.py
index ecfd4f1..961cbbe 100755
--- a/pp.py
+++ b/pp.py
@@ -48,6 +48,7 @@ def save_text(conn, id:int, text:str):
     with conn.cursor() as cursor:
         cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (text, id))
 
+EN = PaddleOCR(use_angle_cls=True, lang="en")
 CH = PaddleOCR(use_angle_cls=True, lang="ch")
 JP = PaddleOCR(use_angle_cls=True, lang="japan")
 KR = PaddleOCR(use_angle_cls=True, lang="korean")
@@ -62,9 +63,18 @@ def process_images(conn, es):
             if isinstance(image, Image.Image):
                 image = np.array(image)
             print('---------------------', id, content)
-            print(CH.ocr(image, cls=True))
-            print(JP.ocr(image, cls=True))
-            print(KR.ocr(image, cls=True))
+            for line in EN.ocr(image, cls=True)[0]:
+                print('EN', line)
+            for line in CH.ocr(image, cls=True)[0]:
+                print('CH', line)
+            for line in JP.ocr(image, cls=True)[0]:
+                print('JP', line)
+            for line in KR.ocr(image, cls=True)[0]:
+                print('KR', line)
+            #print(EN.ocr(image, cls=True))
+            #print(CH.ocr(image, cls=True))
+            #print(JP.ocr(image, cls=True))
+            #print(KR.ocr(image, cls=True))
         #    item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1]
         #    text = ' '.join([x['text'] for x in item])
         #    print(id, text)