From 1e8be5dd8264e8dfbe0bc9d8e400575f9274a56a Mon Sep 17 00:00:00 2001
From: satori <huan0016@gmail.com>
Date: Sun, 3 Dec 2023 17:34:25 +0800
Subject: [PATCH] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E6=9B=B4=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pp.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pp.py b/pp.py
index d52c46a..2f266ea 100755
--- a/pp.py
+++ b/pp.py
@@ -31,6 +31,9 @@ class MyEncoder(json.JSONEncoder):
 
 
 def download_image(url: str) -> Image.Image:
+    if url.endswith('.gif') or url.endswith('.GIF'):
+        print(f'跳过GIF {url}')
+        return None
     try:
         if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
             url = url.replace('http://image.gameuiux.cn/',
@@ -65,7 +68,7 @@ RU = PaddleOCR(use_angle_cls=True, lang="ru")
 
 def process_images(conn, es):
     with conn.cursor(pymysql.cursors.SSCursor) as cursor:
-        cursor.execute("SELECT id, content FROM web_images LIMIT 0,10") # WHERE text!=''
+        cursor.execute("SELECT id, content FROM web_images WHERE text='' LIMIT 0,1000")
         for id, content in cursor.fetchall():
             image = download_image(content)
             if image is None:
@@ -79,6 +82,12 @@ def process_images(conn, es):
             kr = KR.ocr(image, cls=True)[0]
             ru = RU.ocr(image, cls=True)[0]
 
+            en = en if en is not None else []
+            ch = ch if ch is not None else []
+            jp = jp if jp is not None else []
+            kr = kr if kr is not None else []
+            ru = ru if ru is not None else []
+
             # 排除字符长度小于2的行
             jp = [x for x in jp if len(x[1][0]) > 1]
             kr = [x for x in kr if len(x[1][0]) > 1]
@@ -175,4 +184,5 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    for _ in range(1000):
+        main()