持续运行

This commit is contained in:
2024-11-19 04:26:17 +08:00
parent 77589044c9
commit 0d43a639da

37
pp.py
View File

@@ -1,7 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os
import io import io
import oss2 import oss2
import time
import json import json
import base64 import base64
import dotenv import dotenv
@@ -46,7 +48,11 @@ def download_image(url: str, max_size=32767) -> Image.Image:
if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'): if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '') url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET']) oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read())) if os.path.exists(url):
img = Image.open(url)
else:
print(f'从OSS下载图片 {url}')
img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
else: else:
response = requests.get(url) response = requests.get(url)
img = Image.open(io.BytesIO(response.content)) img = Image.open(io.BytesIO(response.content))
@@ -65,10 +71,6 @@ def connect_to_mysql():
return pymysql.connect(host=config['MYSQL_HOST'], user=config['MYSQL_USER'], password=config['MYSQL_PASSWORD'], database=config['MYSQL_NAME'], cursorclass=pymysql.cursors.SSDictCursor) return pymysql.connect(host=config['MYSQL_HOST'], user=config['MYSQL_USER'], password=config['MYSQL_PASSWORD'], database=config['MYSQL_NAME'], cursorclass=pymysql.cursors.SSDictCursor)
def save_text(conn, id: int, text: str):
with conn.cursor() as cursor:
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (text, id))
# 中英日韩俄 # 中英日韩俄
EN = PaddleOCR(use_angle_cls=True, lang="en") EN = PaddleOCR(use_angle_cls=True, lang="en")
@@ -79,7 +81,7 @@ RU = PaddleOCR(use_angle_cls=True, lang="ru")
def process_images(conn): def process_images(conn):
with conn.cursor(pymysql.cursors.SSCursor) as cursor: with conn.cursor(pymysql.cursors.SSCursor) as cursor:
cursor.execute("SELECT id, content FROM web_images WHERE id>222193 AND text='' LIMIT 10") cursor.execute("SELECT id, content FROM web_images WHERE text='' AND text!='[]' AND article_category_top_id=22 LIMIT 10")
for id, content in cursor.fetchall(): for id, content in cursor.fetchall():
image = download_image(content) image = download_image(content)
if image is None: if image is None:
@@ -173,23 +175,22 @@ def process_images(conn):
data.append({'text': 文本[0], 'confidence': 文本[1], 'coordinate': 坐标 }) data.append({'text': 文本[0], 'confidence': 文本[1], 'coordinate': 坐标 })
# 转换为字符串存储到索引库 # 转换为字符串存储到索引库
text = ' '.join([x['text'] for x in data]) obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) }
res = requests.put(zinc_url, headers=headers, data=json.dumps(obj), proxies={'http': '', 'https': ''})
print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else id, text)
# 转换为 JSON 存储到数据库 # 转换为 JSON 存储到数据库
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder) with conn.cursor() as cursor:
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
print(id, text) cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
conn.commit()
save_text(conn, id, data)
res = requests.put(zinc_url, headers=headers, data=json.dumps(data), proxies={'http': '', 'https': ''})
print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else id, text)
conn.commit()
def main(): def main():
conn = connect_to_mysql() conn = connect_to_mysql()
process_images(conn) while True:
process_images(conn)
time.sleep(10)
if __name__ == "__main__": if __name__ == "__main__":
for _ in range(1): main()
main()