合併寫入
This commit is contained in:
		
							
								
								
									
										12
									
								
								pp.py
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								pp.py
									
									
									
									
									
								
							@@ -12,6 +12,7 @@ import requests
 | 
				
			|||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
import warnings
 | 
					import warnings
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from PIL import Image, ImageFile
 | 
					from PIL import Image, ImageFile
 | 
				
			||||||
from paddleocr import PaddleOCR
 | 
					from paddleocr import PaddleOCR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -47,11 +48,12 @@ def download_image(url: str, max_size=32767) -> Image.Image:
 | 
				
			|||||||
    try:
 | 
					    try:
 | 
				
			||||||
        if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
 | 
					        if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
 | 
				
			||||||
            url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
 | 
					            url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
 | 
				
			||||||
            oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
 | 
					 | 
				
			||||||
            if os.path.exists(url):
 | 
					            if os.path.exists(url):
 | 
				
			||||||
 | 
					                print(f'从本地读取图片 {url}')
 | 
				
			||||||
                img = Image.open(url)
 | 
					                img = Image.open(url)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                print(f'从OSS下载图片 {url}')
 | 
					                print(f'从OSS下载图片 {url}')
 | 
				
			||||||
 | 
					                oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
 | 
				
			||||||
                img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
 | 
					                img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            response = requests.get(url)
 | 
					            response = requests.get(url)
 | 
				
			||||||
@@ -79,9 +81,12 @@ JP = PaddleOCR(use_angle_cls=True, lang="japan")
 | 
				
			|||||||
KR = PaddleOCR(use_angle_cls=True, lang="korean")
 | 
					KR = PaddleOCR(use_angle_cls=True, lang="korean")
 | 
				
			||||||
RU = PaddleOCR(use_angle_cls=True, lang="ru")
 | 
					RU = PaddleOCR(use_angle_cls=True, lang="ru")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					offset=1500
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def process_images(conn):
 | 
					def process_images(conn):
 | 
				
			||||||
 | 
					    global offset
 | 
				
			||||||
    with conn.cursor(pymysql.cursors.SSCursor) as cursor:
 | 
					    with conn.cursor(pymysql.cursors.SSCursor) as cursor:
 | 
				
			||||||
        cursor.execute("SELECT id, content FROM web_images WHERE text='' AND text!='[]' AND article_category_top_id=22 LIMIT 10")
 | 
					        cursor.execute("SELECT id, content FROM web_images WHERE text='' AND article_category_top_id=22 LIMIT 100 OFFSET %s", (offset,))
 | 
				
			||||||
        for id, content in cursor.fetchall():
 | 
					        for id, content in cursor.fetchall():
 | 
				
			||||||
            image = download_image(content)
 | 
					            image = download_image(content)
 | 
				
			||||||
            if image is None:
 | 
					            if image is None:
 | 
				
			||||||
@@ -184,12 +189,13 @@ def process_images(conn):
 | 
				
			|||||||
                data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
 | 
					                data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
 | 
				
			||||||
                cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
 | 
					                cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
 | 
				
			||||||
                conn.commit()
 | 
					                conn.commit()
 | 
				
			||||||
 | 
					    offset+=100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    conn = connect_to_mysql()
 | 
					    conn = connect_to_mysql()
 | 
				
			||||||
    while True:
 | 
					    while True:
 | 
				
			||||||
        process_images(conn)
 | 
					        process_images(conn)
 | 
				
			||||||
        time.sleep(10)
 | 
					        time.sleep(0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
whell==0.42.0
 | 
					whell==0.45.0
 | 
				
			||||||
elasticsearch==8.11.0
 | 
					elasticsearch==8.11.0
 | 
				
			||||||
numpy==1.26.2
 | 
					numpy==1.26.2
 | 
				
			||||||
oss2==2.18.3
 | 
					oss2==2.18.3
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user