合併寫入
This commit is contained in:
12
pp.py
12
pp.py
@@ -12,6 +12,7 @@ import requests
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import warnings
|
import warnings
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from PIL import Image, ImageFile
|
from PIL import Image, ImageFile
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
|
|
||||||
@@ -47,11 +48,12 @@ def download_image(url: str, max_size=32767) -> Image.Image:
|
|||||||
try:
|
try:
|
||||||
if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
|
if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
|
||||||
url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
|
url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
|
||||||
oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
|
|
||||||
if os.path.exists(url):
|
if os.path.exists(url):
|
||||||
|
print(f'从本地读取图片 {url}')
|
||||||
img = Image.open(url)
|
img = Image.open(url)
|
||||||
else:
|
else:
|
||||||
print(f'从OSS下载图片 {url}')
|
print(f'从OSS下载图片 {url}')
|
||||||
|
oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
|
||||||
img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
|
img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
|
||||||
else:
|
else:
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
@@ -79,9 +81,12 @@ JP = PaddleOCR(use_angle_cls=True, lang="japan")
|
|||||||
KR = PaddleOCR(use_angle_cls=True, lang="korean")
|
KR = PaddleOCR(use_angle_cls=True, lang="korean")
|
||||||
RU = PaddleOCR(use_angle_cls=True, lang="ru")
|
RU = PaddleOCR(use_angle_cls=True, lang="ru")
|
||||||
|
|
||||||
|
offset=1500
|
||||||
|
|
||||||
def process_images(conn):
|
def process_images(conn):
|
||||||
|
global offset
|
||||||
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
|
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
|
||||||
cursor.execute("SELECT id, content FROM web_images WHERE text='' AND text!='[]' AND article_category_top_id=22 LIMIT 10")
|
cursor.execute("SELECT id, content FROM web_images WHERE text='' AND article_category_top_id=22 LIMIT 100 OFFSET %s", (offset,))
|
||||||
for id, content in cursor.fetchall():
|
for id, content in cursor.fetchall():
|
||||||
image = download_image(content)
|
image = download_image(content)
|
||||||
if image is None:
|
if image is None:
|
||||||
@@ -184,12 +189,13 @@ def process_images(conn):
|
|||||||
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
|
data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
|
||||||
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
|
cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
offset+=100
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
conn = connect_to_mysql()
|
conn = connect_to_mysql()
|
||||||
while True:
|
while True:
|
||||||
process_images(conn)
|
process_images(conn)
|
||||||
time.sleep(10)
|
time.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
whell==0.42.0
|
whell==0.45.0
|
||||||
elasticsearch==8.11.0
|
elasticsearch==8.11.0
|
||||||
numpy==1.26.2
|
numpy==1.26.2
|
||||||
oss2==2.18.3
|
oss2==2.18.3
|
||||||
|
Reference in New Issue
Block a user