同步更改
This commit is contained in:
14
pp.py
14
pp.py
@@ -31,6 +31,9 @@ class MyEncoder(json.JSONEncoder):
|
|||||||
|
|
||||||
|
|
||||||
def download_image(url: str) -> Image.Image:
|
def download_image(url: str) -> Image.Image:
|
||||||
|
if url.endswith('.gif') or url.endswith('.GIF'):
|
||||||
|
print(f'跳过GIF {url}')
|
||||||
|
return None
|
||||||
try:
|
try:
|
||||||
if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
|
if url.startswith('http://image.gameuiux.cn/') or url.startswith('https://image.gameuiux.cn/'):
|
||||||
url = url.replace('http://image.gameuiux.cn/',
|
url = url.replace('http://image.gameuiux.cn/',
|
||||||
@@ -65,7 +68,7 @@ RU = PaddleOCR(use_angle_cls=True, lang="ru")
|
|||||||
|
|
||||||
def process_images(conn, es):
|
def process_images(conn, es):
|
||||||
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
|
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
|
||||||
cursor.execute("SELECT id, content FROM web_images LIMIT 0,10") # WHERE text!=''
|
cursor.execute("SELECT id, content FROM web_images WHERE text='' LIMIT 0,1000")
|
||||||
for id, content in cursor.fetchall():
|
for id, content in cursor.fetchall():
|
||||||
image = download_image(content)
|
image = download_image(content)
|
||||||
if image is None:
|
if image is None:
|
||||||
@@ -79,6 +82,12 @@ def process_images(conn, es):
|
|||||||
kr = KR.ocr(image, cls=True)[0]
|
kr = KR.ocr(image, cls=True)[0]
|
||||||
ru = RU.ocr(image, cls=True)[0]
|
ru = RU.ocr(image, cls=True)[0]
|
||||||
|
|
||||||
|
en = en if en is not None else []
|
||||||
|
ch = ch if ch is not None else []
|
||||||
|
jp = jp if jp is not None else []
|
||||||
|
kr = kr if kr is not None else []
|
||||||
|
ru = ru if ru is not None else []
|
||||||
|
|
||||||
# 排除字符长度小于2的行
|
# 排除字符长度小于2的行
|
||||||
jp = [x for x in jp if len(x[1][0]) > 1]
|
jp = [x for x in jp if len(x[1][0]) > 1]
|
||||||
kr = [x for x in kr if len(x[1][0]) > 1]
|
kr = [x for x in kr if len(x[1][0]) > 1]
|
||||||
@@ -175,4 +184,5 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
for _ in range(1000):
|
||||||
|
main()
|
||||||
|
Reference in New Issue
Block a user