持续运行
This commit is contained in:
		
							
								
								
									
										37
									
								
								pp.py
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								pp.py
									
									
									
									
									
								
							@@ -1,7 +1,9 @@
 | 
				
			|||||||
#!/usr/bin/env python3
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
import io
 | 
					import io
 | 
				
			||||||
import oss2
 | 
					import oss2
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import base64
 | 
					import base64
 | 
				
			||||||
import dotenv
 | 
					import dotenv
 | 
				
			||||||
@@ -46,7 +48,11 @@ def download_image(url: str, max_size=32767) -> Image.Image:
 | 
				
			|||||||
        if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
 | 
					        if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
 | 
				
			||||||
            url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
 | 
					            url = url.replace('http://image.gameuiux.cn/', '').replace('https://image.gameuiux.cn/', '')
 | 
				
			||||||
            oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
 | 
					            oss_auth = oss2.Auth(config['OSS_ACCESS_KEY_ID'], config['OSS_ACCESS_KEY_SECRET'])
 | 
				
			||||||
            img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
 | 
					            if os.path.exists(url):
 | 
				
			||||||
 | 
					                img = Image.open(url)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                print(f'从OSS下载图片 {url}')
 | 
				
			||||||
 | 
					                img = Image.open(io.BytesIO(oss2.Bucket(oss_auth, f'http://{config["OSS_HOST"]}', config['OSS_BUCKET_NAME']).get_object(url).read()))
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            response = requests.get(url)
 | 
					            response = requests.get(url)
 | 
				
			||||||
            img = Image.open(io.BytesIO(response.content))
 | 
					            img = Image.open(io.BytesIO(response.content))
 | 
				
			||||||
@@ -65,10 +71,6 @@ def connect_to_mysql():
 | 
				
			|||||||
    return pymysql.connect(host=config['MYSQL_HOST'], user=config['MYSQL_USER'], password=config['MYSQL_PASSWORD'], database=config['MYSQL_NAME'], cursorclass=pymysql.cursors.SSDictCursor)
 | 
					    return pymysql.connect(host=config['MYSQL_HOST'], user=config['MYSQL_USER'], password=config['MYSQL_PASSWORD'], database=config['MYSQL_NAME'], cursorclass=pymysql.cursors.SSDictCursor)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def save_text(conn, id: int, text: str):
 | 
					 | 
				
			||||||
    with conn.cursor() as cursor:
 | 
					 | 
				
			||||||
        cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (text, id))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# 中英日韩俄
 | 
					# 中英日韩俄
 | 
				
			||||||
EN = PaddleOCR(use_angle_cls=True, lang="en")
 | 
					EN = PaddleOCR(use_angle_cls=True, lang="en")
 | 
				
			||||||
@@ -79,7 +81,7 @@ RU = PaddleOCR(use_angle_cls=True, lang="ru")
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def process_images(conn):
 | 
					def process_images(conn):
 | 
				
			||||||
    with conn.cursor(pymysql.cursors.SSCursor) as cursor:
 | 
					    with conn.cursor(pymysql.cursors.SSCursor) as cursor:
 | 
				
			||||||
        cursor.execute("SELECT id, content FROM web_images WHERE id>222193 AND text='' LIMIT 10")
 | 
					        cursor.execute("SELECT id, content FROM web_images WHERE text='' AND text!='[]' AND article_category_top_id=22 LIMIT 10")
 | 
				
			||||||
        for id, content in cursor.fetchall():
 | 
					        for id, content in cursor.fetchall():
 | 
				
			||||||
            image = download_image(content)
 | 
					            image = download_image(content)
 | 
				
			||||||
            if image is None:
 | 
					            if image is None:
 | 
				
			||||||
@@ -173,23 +175,22 @@ def process_images(conn):
 | 
				
			|||||||
                data.append({'text': 文本[0], 'confidence': 文本[1], 'coordinate': 坐标 })
 | 
					                data.append({'text': 文本[0], 'confidence': 文本[1], 'coordinate': 坐标 })
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            # 转换为字符串存储到索引库
 | 
					            # 转换为字符串存储到索引库
 | 
				
			||||||
            text = ' '.join([x['text'] for x in data])
 | 
					            obj = { "_id": str(id), "text": ' '.join([x['text'] for x in data]) }
 | 
				
			||||||
 | 
					            res = requests.put(zinc_url, headers=headers, data=json.dumps(obj), proxies={'http': '', 'https': ''})
 | 
				
			||||||
 | 
					            print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else id, text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # 转换为 JSON 存储到数据库
 | 
					            # 转换为 JSON 存储到数据库
 | 
				
			||||||
            data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
 | 
					            with conn.cursor() as cursor:
 | 
				
			||||||
            
 | 
					                data = json.dumps(data, ensure_ascii=False, cls=MyEncoder)
 | 
				
			||||||
            print(id, text)
 | 
					                cursor.execute("UPDATE web_images SET text = %s WHERE id = %s", (data, id))
 | 
				
			||||||
 | 
					                conn.commit()
 | 
				
			||||||
            save_text(conn, id, data)
 | 
					 | 
				
			||||||
            res = requests.put(zinc_url, headers=headers, data=json.dumps(data), proxies={'http': '', 'https': ''})
 | 
					 | 
				
			||||||
            print("\033[1;32m{}\033[0m".format(id) if json.loads(res.text)['message'] == 'ok' else id, text)
 | 
					 | 
				
			||||||
            conn.commit()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    conn = connect_to_mysql()
 | 
					    conn = connect_to_mysql()
 | 
				
			||||||
    process_images(conn)
 | 
					    while True:
 | 
				
			||||||
 | 
					        process_images(conn)
 | 
				
			||||||
 | 
					        time.sleep(10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    for _ in range(1):
 | 
					    main()
 | 
				
			||||||
        main()
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user