移除 text 预览
This commit is contained in:
		
							
								
								
									
										3
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					    "editor.inlineSuggest.showToolbar": "onHover"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										8
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								main.py
									
									
									
									
									
								
							@@ -45,15 +45,15 @@ def save_text(conn, id:int, text:str):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def process_images(conn, ocr, es):
 | 
					def process_images(conn, ocr, es):
 | 
				
			||||||
    with conn.cursor(pymysql.cursors.SSCursor) as cursor:
 | 
					    with conn.cursor(pymysql.cursors.SSCursor) as cursor:
 | 
				
			||||||
        cursor.execute("SELECT id, content, text FROM web_images WHERE text!='' LIMIT 10")
 | 
					        cursor.execute("SELECT id, content FROM web_images WHERE text!='' LIMIT 10")
 | 
				
			||||||
        for id, content, text in cursor.fetchall():
 | 
					        for id, content in cursor.fetchall():
 | 
				
			||||||
            image = download_image(content)
 | 
					            image = download_image(content)
 | 
				
			||||||
            if image is None:
 | 
					            if image is None:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1]
 | 
					            item = [x for x in ocr.ocr(image) if x['text'] and not x['text'].isdigit() and len(x['text']) > 1]
 | 
				
			||||||
            save_text(conn, id, json.dumps(item, ensure_ascii=False, cls=MyEncoder))
 | 
					            save_text(conn, id, json.dumps(item, ensure_ascii=False, cls=MyEncoder))
 | 
				
			||||||
            texts = ' '.join([x['text'] for x in item])
 | 
					            text = ' '.join([x['text'] for x in item])
 | 
				
			||||||
            es.index(index='web_images', id=id, body={'content': texts})
 | 
					            es.index(index='web_images', id=id, body={'content': text})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    es = Elasticsearch(config['ELASTICSEARCH_HOST'], basic_auth=(config['ELASTICSEARCH_USERNAME'], config['ELASTICSEARCH_PASSWORD']), verify_certs=False)
 | 
					    es = Elasticsearch(config['ELASTICSEARCH_HOST'], basic_auth=(config['ELASTICSEARCH_USERNAME'], config['ELASTICSEARCH_PASSWORD']), verify_certs=False)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user