同步
This commit is contained in:
12
pp.py
12
pp.py
@@ -7,11 +7,14 @@ import pymysql
|
||||
import json
|
||||
import numpy as np
|
||||
import warnings
|
||||
import logging
|
||||
from PIL import Image, ImageFile
|
||||
from dotenv import dotenv_values
|
||||
from elasticsearch import Elasticsearch
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
logging.disable(logging.DEBUG) # 关闭DEBUG日志的打印
|
||||
logging.disable(logging.WARNING) # 关闭WARNING日志的打印
|
||||
warnings.filterwarnings("ignore")
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
config = dotenv_values(".env")
|
||||
@@ -34,8 +37,8 @@ def download_image(url:str) -> Image.Image:
|
||||
else:
|
||||
response = requests.get(url)
|
||||
return Image.open(io.BytesIO(response.content))
|
||||
except Exception:
|
||||
print(f'图片从{url}下载失败')
|
||||
except Exception as e:
|
||||
print(f'图片从{url}下载失败,错误信息为:{e}')
|
||||
return None
|
||||
|
||||
def connect_to_mysql():
|
||||
@@ -51,11 +54,14 @@ KR = PaddleOCR(use_angle_cls=True, lang="korean")
|
||||
|
||||
def process_images(conn, es):
|
||||
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
|
||||
cursor.execute("SELECT id, content FROM web_images WHERE text='' LIMIT 10")
|
||||
cursor.execute("SELECT id, content FROM web_images WHERE text!='' LIMIT 10")
|
||||
for id, content in cursor.fetchall():
|
||||
image = download_image(content)
|
||||
if image is None:
|
||||
continue
|
||||
if isinstance(image, Image.Image):
|
||||
image = np.array(image)
|
||||
print('---------------------', id, content)
|
||||
print(CH.ocr(image, cls=True))
|
||||
print(JP.ocr(image, cls=True))
|
||||
print(KR.ocr(image, cls=True))
|
||||
|
Reference in New Issue
Block a user