From 1e8be5dd8264e8dfbe0bc9d8e400575f9274a56a Mon Sep 17 00:00:00 2001 From: satori Date: Sun, 3 Dec 2023 17:34:25 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E6=9B=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pp.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pp.py b/pp.py index d52c46a..2f266ea 100755 --- a/pp.py +++ b/pp.py @@ -31,6 +31,9 @@ class MyEncoder(json.JSONEncoder): def download_image(url: str) -> Image.Image: + if url.endswith('.gif') or url.endswith('.GIF'): + print(f'跳过GIF {url}') + return None try: if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'): url = url.replace('http://image.gameuiux.cn/', @@ -65,7 +68,7 @@ RU = PaddleOCR(use_angle_cls=True, lang="ru") def process_images(conn, es): with conn.cursor(pymysql.cursors.SSCursor) as cursor: - cursor.execute("SELECT id, content FROM web_images LIMIT 0,10") # WHERE text!='' + cursor.execute("SELECT id, content FROM web_images WHERE text='' LIMIT 0,1000") for id, content in cursor.fetchall(): image = download_image(content) if image is None: @@ -79,6 +82,12 @@ def process_images(conn, es): kr = KR.ocr(image, cls=True)[0] ru = RU.ocr(image, cls=True)[0] + en = en if en is not None else [] + ch = ch if ch is not None else [] + jp = jp if jp is not None else [] + kr = kr if kr is not None else [] + ru = ru if ru is not None else [] + # 排除字符长度小于2的行 jp = [x for x in jp if len(x[1][0]) > 1] kr = [x for x in kr if len(x[1][0]) > 1] @@ -175,4 +184,5 @@ def main(): if __name__ == "__main__": - main() + for _ in range(1000): + main()