合併寫入

This commit is contained in:
散仙
2024-11-19 14:03:17 +08:00
parent 0d43a639da
commit d7161c7df1
2 changed files with 10 additions and 4 deletions

12
pp.py
View File

@@ -12,6 +12,7 @@ import requests
import numpy as np import numpy as np
import warnings import warnings
import logging import logging
from PIL import Image, ImageFile from PIL import Image, ImageFile
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
@@ -47,11 +48,12 @@ def download_image(url: str, max_size=32767) -> Image.Image:
try: try:
if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'): if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '') url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
if os.path.exists(url): if os.path.exists(url):
print(f'从本地读取图片 {url}')
img = Image.open(url) img = Image.open(url)
else: else:
print(f'从OSS下载图片 {url}') print(f'从OSS下载图片 {url}')
oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read())) img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
else: else:
response = requests.get(url) response = requests.get(url)
@@ -79,9 +81,12 @@ JP = PaddleOCR(use_angle_cls=True, lang="japan")
KR = PaddleOCR(use_angle_cls=True, lang="korean") KR = PaddleOCR(use_angle_cls=True, lang="korean")
RU = PaddleOCR(use_angle_cls=True, lang="ru") RU = PaddleOCR(use_angle_cls=True, lang="ru")
offset=1500
def process_images(conn): def process_images(conn):
global offset
with conn.cursor(pymysql.cursors.SSCursor) as cursor: with conn.cursor(pymysql.cursors.SSCursor) as cursor:
cursor.execute("SELECT id, content FROM web_images WHERE text='' AND text!='[]' AND article_category_top_id=22 LIMIT 10") cursor.execute("SELECT id, content FROM web_images WHERE text='' AND article_category_top_id=22 LIMIT 100 OFFSET %s", (offset,))
for id, content in cursor.fetchall(): for id, content in cursor.fetchall():
image = download_image(content) image = download_image(content)
if image is None: if image is None:
@@ -184,12 +189,13 @@ def process_images(conn):
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder) data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id)) cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
conn.commit() conn.commit()
offset+=100
def main(): def main():
conn = connect_to_mysql() conn = connect_to_mysql()
while True: while True:
process_images(conn) process_images(conn)
time.sleep(10) time.sleep(0)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,4 +1,4 @@
whell==0.42.0 whell==0.45.0
elasticsearch==8.11.0 elasticsearch==8.11.0
numpy==1.26.2 numpy==1.26.2
oss2==2.18.3 oss2==2.18.3