From d7161c7df1019dea1cd20712520a8b19990afcc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=95=A3=E4=BB=99?= Date: Tue, 19 Nov 2024 14:03:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=88=E4=BD=B5=E5=AF=AB=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pp.py | 12 +++++++++--- requirements.txt | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pp.py b/pp.py index 0e95779..dcb4f06 100755 --- a/pp.py +++ b/pp.py @@ -12,6 +12,7 @@ import requests import numpy as np import warnings import logging + from PIL import Image, ImageFile from paddleocr import PaddleOCR @@ -47,11 +48,12 @@ def download_image(url: str, max_size=32767) -> Image.Image: try: if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'): url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '') - oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET']) if os.path.exists(url): + print(f'从本地读取图片 {url}') img = Image.open(url) else: print(f'从OSS下载图片 {url}') + oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET']) img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read())) else: response = requests.get(url) @@ -79,9 +81,12 @@ JP = PaddleOCR(use_angle_cls=True, lang="japan") KR = PaddleOCR(use_angle_cls=True, lang="korean") RU = PaddleOCR(use_angle_cls=True, lang="ru") +offset=1500 + def process_images(conn): + global offset with conn.cursor(pymysql.cursors.SSCursor) as cursor: - cursor.execute("SELECT id, content FROM web_images WHERE text='' AND text!='[]' AND article_category_top_id=22 LIMIT 10") + cursor.execute("SELECT id, content FROM web_images WHERE text='' AND article_category_top_id=22 LIMIT 100 OFFSET %s", (offset,)) for id, content in cursor.fetchall(): image = download_image(content) if image is None: @@ -184,12 +189,13 @@ def process_images(conn): data = json.dumps(data, ensure_ascii=False, cls=MyEncoder) cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id)) conn.commit() + offset+=100 def main(): conn = connect_to_mysql() while True: process_images(conn) - time.sleep(10) + time.sleep(0) if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 2545c47..18dd039 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -whell==0.42.0 +whell==0.45.0 elasticsearch==8.11.0 numpy==1.26.2 oss2==2.18.3