修正圖像初始化錯誤
This commit is contained in:
12
bin/main.go
12
bin/main.go
@@ -543,7 +543,11 @@ func main() {
|
||||
return
|
||||
}
|
||||
var img models.Image
|
||||
img.Init(content)
|
||||
if err := img.Init(content); err != nil {
|
||||
log.Println("初始化图片失败", format, err)
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
data, err := img.ToWebP(width, height, fit)
|
||||
if err != nil {
|
||||
log.Println("转换图片失败", err)
|
||||
@@ -575,7 +579,11 @@ func main() {
|
||||
return
|
||||
}
|
||||
var img models.Image
|
||||
img.Init(content)
|
||||
if err := img.Init(content); err != nil {
|
||||
log.Println("初始化图片失败", version, format, err)
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
data, err := img.ToWebP(width, height, fit)
|
||||
if err != nil {
|
||||
log.Println("转换图片失败", err)
|
||||
|
68
bin/ocr.py
Normal file
68
bin/ocr.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# python 實現圖片文字辨識(ocr)
|
||||
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
|
||||
# 設置tesseract執行文件的路徑(linux)
|
||||
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
|
||||
|
||||
# 設置提取中文和英文
|
||||
|
||||
|
||||
# 讀取圖片
|
||||
image = Image.open('data/tt.jpeg')
|
||||
|
||||
# 圖片轉為灰度圖
|
||||
image = image.convert('L')
|
||||
|
||||
# 圖片二值化
|
||||
threshold = 127
|
||||
table = []
|
||||
for i in range(256):
|
||||
if i < threshold:
|
||||
table.append(0)
|
||||
else:
|
||||
table.append(1)
|
||||
image = image.point(table, '1')
|
||||
|
||||
# 圖片轉為字符串
|
||||
#text = pytesseract.image_to_string(image, lang='chi_sim+eng')
|
||||
#print(text)
|
||||
#
|
||||
#data = pytesseract.image_to_boxes(image, lang='chi_sim+eng')
|
||||
#print(data)
|
||||
|
||||
# 座標(全數據)
|
||||
#data = pytesseract.image_to_data(image, lang='chi_sim+eng')
|
||||
#print(data)
|
||||
|
||||
data = pytesseract.image_to_alto_xml(image, lang='chi_sim+eng')
|
||||
#print(data)
|
||||
|
||||
## 打印格式化的XML
|
||||
#from xml.dom.minidom import parseString
|
||||
#dom = parseString(data)
|
||||
#print(dom.toprettyxml())
|
||||
|
||||
# 打印格式化的JSON
|
||||
import json
|
||||
from xmljson import badgerfish as bf
|
||||
from xml.etree.ElementTree import fromstring
|
||||
|
||||
# 過濾掉@CONTENT爲空的數據和爲數字的數據
|
||||
def filter_data(data):
|
||||
if isinstance(data, dict):
|
||||
if '@CONTENT' in data:
|
||||
if data['@CONTENT'] == '' or data['@CONTENT'].isdigit():
|
||||
return None
|
||||
for k, v in data.items():
|
||||
if isinstance(v, dict):
|
||||
data[k] = filter_data(v)
|
||||
elif isinstance(v, list):
|
||||
data[k] = [filter_data(i) for i in v]
|
||||
return data
|
||||
|
||||
xml = fromstring(data)
|
||||
json = json.dumps(bf.data(xml), indent=4, ensure_ascii=False)
|
||||
print(json)
|
||||
|
Reference in New Issue
Block a user