移除 text 预览

This commit is contained in:
2023-12-01 02:39:13 +08:00
parent dfb7041746
commit 92921f99eb
2 changed files with 7 additions and 4 deletions

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"editor.inlineSuggest.showToolbar": "onHover"
}

View File

@@ -45,15 +45,15 @@ def save_text(conn, id:int, text:str):
def process_images(conn, ocr, es):
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
cursor.execute("SELECT id, content, text FROM web_images WHERE text!='' LIMIT 10")
for id, content, text in cursor.fetchall():
cursor.execute("SELECT id, content FROM web_images WHERE text!='' LIMIT 10")
for id, content in cursor.fetchall():
image = download_image(content)
if image is None:
continue
item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1]
save_text(conn, id, json.dumps(item, ensure_ascii=False, cls=MyEncoder))
texts = ' '.join([x['text'] for x in item])
es.index(index='web_images', id=id, body={'content': texts})
text = ' '.join([x['text'] for x in item])
es.index(index='web_images', id=id, body={'content': text})
def main():
es = Elasticsearch(config['ELASTICSEARCH_HOST'], basic_auth=(config['ELASTICSEARCH_USERNAME'], config['ELASTICSEARCH_PASSWORD']), verify_certs=False)