diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..14f6030 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "editor.inlineSuggest.showToolbar": "onHover" +} \ No newline at end of file diff --git a/main.py b/main.py index d0a10b6..5804cd3 100755 --- a/main.py +++ b/main.py @@ -45,15 +45,15 @@ def save_text(conn, id:int, text:str): def process_images(conn, ocr, es): with conn.cursor(pymysql.cursors.SSCursor) as cursor: - cursor.execute("SELECT id, content, text FROM web_images WHERE text!='' LIMIT 10") - for id, content, text in cursor.fetchall(): + cursor.execute("SELECT id, content FROM web_images WHERE text!='' LIMIT 10") + for id, content in cursor.fetchall(): image = download_image(content) if image is None: continue item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1] save_text(conn, id, json.dumps(item, ensure_ascii=False, cls=MyEncoder)) - texts = ' '.join([x['text'] for x in item]) - es.index(index='web_images', id=id, body={'content': texts}) + text = ' '.join([x['text'] for x in item]) + es.index(index='web_images', id=id, body={'content': text}) def main(): es = Elasticsearch(config['ELASTICSEARCH_HOST'], basic_auth=(config['ELASTICSEARCH_USERNAME'], config['ELASTICSEARCH_PASSWORD']), verify_certs=False)