Files
webp/bin/main.go
2023-10-25 04:31:18 +08:00

636 lines
21 KiB
Go

package main
import (
"context"
"fmt"
"log"
"net/http"
"net/url"
"runtime"
"strings"
"time"
"regexp"
"strconv"
"encoding/json"
"git.satori.love/gameui/webp/models"
_ "github.com/go-sql-driver/mysql"
"github.com/milvus-io/milvus-sdk-go/v2/entity"
"github.com/spf13/viper"
)
// string 转换为 int, 如果转换失败则返回默认值
func stringToInt(str string, defaultValue int) int {
if str == "" {
return defaultValue
}
value, err := strconv.Atoi(str)
if err != nil {
return defaultValue
}
return value
}
func LogComponent(startTime int64, r *http.Request) {
ms := (time.Now().UnixNano() - startTime) / 1000000
color := "\033[1;32m%d\033[0m"
if ms > 800 {
color = "\033[1;31m%dms\033[0m" // 紅色加重
} else if ms > 500 {
color = "\033[1;33m%dms\033[0m" // 黃色加重
} else if ms > 300 {
color = "\033[1;32m%dms\033[0m" // 綠色加重
} else if ms > 200 {
color = "\033[1;34m%dms\033[0m" // 藍色加重
} else if ms > 100 {
color = "\033[1;35m%dms\033[0m" // 紫色加重
} else {
color = "\033[1;36m%dms\033[0m" // 黑色加重
}
endTime := fmt.Sprintf(color, ms)
method := fmt.Sprintf("\033[1;32m%s\033[0m", r.Method) // 綠色加重
url := fmt.Sprintf("\033[1;34m%s\033[0m", r.URL) // 藍色加重
log.Println(method, url, endTime)
}
type User struct {
Id int `json:"id"`
UserName string `json:"user_name"`
CreateTime time.Time `json:"create_time"`
UpdateTime time.Time `json:"update_time"`
}
type Article struct {
Id int `json:"id"`
Title string `json:"title"`
Tags string `json:"tags"`
CreateTime time.Time `json:"create_time"`
UpdateTime time.Time `json:"update_time"`
}
type Image struct {
Id int `json:"id"`
Width int `json:"width"`
Height int `json:"height"`
Content string `json:"content"`
ArticleCategoryTopId int `json:"article_category_top_id"`
PraiseCount int `json:"praise_count"`
CollectCount int `json:"collect_count"`
CreateTime time.Time `json:"createTime"`
UpdateTime time.Time `json:"updateTime"`
User User `json:"user"`
Article Article `json:"article"`
}
type Tag struct {
Id int `json:"id"`
Name string `json:"name"`
CreateTime time.Time `json:"create_time"`
UpdateTime time.Time `json:"update_time"`
}
type History struct {
Type string `json:"type"`
CreateTime time.Time `json:"create_time"`
Data interface{} `json:"data"`
}
type ListView struct {
Code int `json:"code"`
Page int `json:"page"`
PageSize int `json:"page_size"`
Total int `json:"total"`
Next bool `json:"next"`
List []interface{} `json:"list"`
}
var mysqlConnection models.MysqlConnection
var milvusConnection models.MilvusConnection
func GetNetWorkEmbedding(id int) (embedding []float32) {
host := viper.GetString("embedding.host")
port := viper.GetInt("embedding.port")
httpClient := &http.Client{}
request, err := http.NewRequest("PUT", fmt.Sprintf("http://%s:%d/reverse/%d", host, port, id), nil)
if err != nil {
log.Println("请求失败:", err)
return
}
response, err := httpClient.Do(request)
if err != nil {
log.Println("请求失败:", err)
return
}
defer response.Body.Close()
var result map[string]interface{}
err = json.NewDecoder(response.Body).Decode(&result)
if err != nil {
log.Println("解析失败:", err)
return
}
if result["code"] != 0 {
log.Println("请求失败:", result["message"])
return
}
embedding = result["feature"].([]float32)
return embedding
}
func (image *Image) GetSimilarImagesIdList(collection_name string) (ids []int64) {
ctx := context.Background()
// 先从milvus中查询图片的向量
var embedding []float32
result, err := milvusConnection.Client.Query(ctx, collection_name, nil, fmt.Sprintf("id in [%d]", image.Id), []string{"embedding"})
if err != nil {
log.Println("查詢向量失敗:", err)
embedding = GetNetWorkEmbedding(image.Id)
} else {
for _, item := range result {
if item.Name() == "embedding" {
embedding = item.FieldData().GetVectors().GetFloatVector().Data
continue
}
}
}
// 处理向量不存在的情况
if len(embedding) == 0 {
log.Println("向量不存在, 也未能重新生成")
return ids
}
// 用向量查询相似图片
topk := 1000
sp, _ := entity.NewIndexIvfFlatSearchParam(64)
vectors := []entity.Vector{entity.FloatVector(embedding)}
resultx, err := milvusConnection.Client.Search(ctx, collection_name, nil, "", []string{"id", "article_id"}, vectors, "embedding", entity.L2, topk, sp)
if err != nil {
log.Println("搜索相似失敗:", err)
return
}
// 输出结果
for _, item := range resultx {
//fmt.Println(item.Scores)
//fmt.Println(item.IDs.FieldData().GetScalars().GetLongData().GetData())
ids = item.IDs.FieldData().GetScalars().GetLongData().GetData()
}
return ids
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
mysqlConnection.Init()
milvusConnection.Init()
err := milvusConnection.Client.LoadCollection(context.Background(), "default", false)
if err != nil {
log.Println("Milvus load collection failed:", err)
return
}
// test ocr
//client := gosseract.NewClient()
//defer client.Close()
//client.SetLanguage("chi_sim")
//client.SetImage("/home/stori/webp/data/tt.jpeg")
//text, _ := client.Text()
//fmt.Println(text)
//fmt.Println("=======================================")
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
w.Write([]byte("Hello World!"))
})
// 屏蔽 /favicon.ico
http.HandleFunc("/favicon.ico", func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "Not Found", http.StatusNotFound)
})
// 获取浏览记录
http.HandleFunc("/history", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// 按会话过滤
// 按用户过滤
// 按时间过滤
// 按类型过滤
// 按数据过滤
// 日志记录器:
// 会话记录, 调取也从SDK本地取数据
// URL变化或新开 [URL, 来源, 时间]
// 针对某些组件挂载的事件 [组件, 事件, 时间]
// 记录用户行为 [用户, 行为, 时间]
// 查看过的[图片, 文章, 精选集, 用户]
// 请求过的API连接
// 展示了的数据
// 展示后被点击的数据(+正反馈)
// 展示后被收藏的数据
// 展示后被分享的数据
// 展示后被评论的数据
// 展示后被点赞的数据
// 展示后被下载的数据
// 展示后被忽略的数据(+负反馈)
// 展示后被屏蔽的数据(+屏蔽功能)
// 获取用户id
userId := stringToInt(r.URL.Query().Get("user_id"), 0)
if userId == 0 {
w.Write([]byte("user_id is required"))
return
}
// 获取图片id列表
var ids []int64
err := json.Unmarshal([]byte(r.URL.Query().Get("ids")), &ids)
if err != nil {
w.Write([]byte("ids is required"))
return
}
// 获取图片信息列表
var images []Image
//mysqlConnection.DB.Where("id in (?)", ids).Find(&images)
// 返回结果
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(images)
})
// 获取图片信息列表(分页)
http.HandleFunc("/images", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// 私域: (自己的图片, 自己的文章, 自己的精选集, 点赞收藏精选集)
// 条件查询(模糊搜索, 时间区间, 作者, 标签, 分类, 精选集, 状态, 置顶, 模糊权重)(权重规则:权重指数)
// 条件筛选(交集, 并集, 差集, 子集)
// 排序
// 分页
// 获取查询条件(忽略空值), 超级简洁写法
QueryConditions := func(key string) (list []string) {
for _, item := range strings.Split(r.URL.Query().Get(key), ",") {
if item != "" {
list = append(list, fmt.Sprintf("'%s'", item))
}
}
return list
}
// 拼接查询条件, 超级简洁写法
conditions := ""
if authors := QueryConditions("authors"); len(authors) > 0 {
conditions += fmt.Sprintf(" AND author IN (%s)", strings.Join(authors, ","))
}
if tags := QueryConditions("tags"); len(tags) > 0 {
conditions += fmt.Sprintf(" AND tag IN (%s)", strings.Join(tags, ","))
}
if categories := QueryConditions("categories"); len(categories) > 0 {
conditions += fmt.Sprintf(" AND categorie IN (%s)", strings.Join(categories, ","))
}
if sets := QueryConditions("sets"); len(sets) > 0 {
conditions += fmt.Sprintf(" AND sets IN (%s)", strings.Join(sets, ","))
}
var ids []int64
if similar := QueryConditions("similar"); len(similar) > 0 {
id, err := strconv.Atoi(strings.Trim(similar[0], "'"))
if err != nil {
log.Println("strconv.Atoi failed:", err)
return
}
// 如果指定以某个图片为基准的相似图片列表范围, 获取相似图片ID的列表
ids = (&Image{Id: id}).GetSimilarImagesIdList("default")
idsStr := make([]string, len(ids))
for i, v := range ids {
idsStr[i] = strconv.FormatInt(v, 10)
}
if len(idsStr) > 0 {
conditions += fmt.Sprintf(" AND id IN (%s)", strings.Join(idsStr, ",")) // 拼接查询条件
}
}
if conditions != "" {
conditions = strings.Replace(conditions, " AND", "", 1) // 去掉第一个 AND
conditions = " WHERE" + conditions // 拼接 WHERE
}
// 获取图片列表
var images ListView
var image_list []Image
images.Page, images.PageSize = stringToInt(r.URL.Query().Get("page"), 1), stringToInt(r.URL.Query().Get("pageSize"), 10)
rows, err := mysqlConnection.Database.Query("SELECT id, width, height, content, update_time, create_time, user_id, article_id, article_category_top_id, praise_count, collect_count FROM web_images"+conditions+" LIMIT ?, ?", (images.Page-1)*images.PageSize, images.PageSize)
if err != nil {
log.Println("获取图片列表失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
defer rows.Close()
for rows.Next() {
var image Image
rows.Scan(&image.Id, &image.Width, &image.Height, &image.Content, &image.UpdateTime, &image.CreateTime, &image.User.Id, &image.Article.Id, &image.ArticleCategoryTopId, &image.PraiseCount, &image.CollectCount)
image.UpdateTime = image.UpdateTime.UTC()
image.CreateTime = image.CreateTime.UTC()
image.Content = regexp.MustCompile(`http:`).ReplaceAllString(image.Content, "https:")
image_list = append(image_list, image)
}
// 如果使用了相似图片列表范围, 需要按照相似图片ID原本的顺序重新排序, 需要注意的是, 相似图片ID列表中可能会包含不在当前页的图片ID
if similar := QueryConditions("similar"); len(similar) > 0 {
var image_list_sorted []Image
for _, id := range ids {
for _, image := range image_list {
if image.Id == int(id) {
image_list_sorted = append(image_list_sorted, image)
}
}
}
image_list = image_list_sorted
}
// 附加用户信息(第一步: 获取用户ID列表)
var user_ids []int
for _, image := range image_list {
user_ids = append(user_ids, image.User.Id)
}
// 附加用户信息(第二步: 获取用户信息)
var users []User
if len(user_ids) > 0 {
// 使用逗号分隔的用户ID列表查询用户信息 strings.Join(strings.Fields(fmt.Sprint(user_ids)), ",")
user_ids_str := strings.Trim(strings.Replace(fmt.Sprint(user_ids), " ", ",", -1), "[]")
rows, err := mysqlConnection.Database.Query("SELECT id, user_name, update_time, create_time FROM web_member WHERE id IN (" + user_ids_str + ")")
if err != nil {
log.Println("获取用户列表失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
defer rows.Close()
for rows.Next() {
var user User
rows.Scan(&user.Id, &user.UserName, &user.UpdateTime, &user.CreateTime)
user.UpdateTime = user.UpdateTime.UTC()
user.CreateTime = user.CreateTime.UTC()
users = append(users, user)
}
}
// 附加用户信息(第三步: 将用户信息附加到图片信息中)
for i, image := range image_list {
for _, user := range users {
if image.User.Id == user.Id {
image_list[i].User = user
}
}
}
// 附加图片集信息(第一步: 获取图片集ID列表)
var article_ids []int
for _, image := range image_list {
article_ids = append(article_ids, image.Article.Id)
}
// 附加图片集信息(第二步: 获取图片集信息)
var articles []Article
if len(article_ids) > 0 {
// 使用逗号分隔的图片集ID列表查询图片集信息 strings.Join(strings.Fields(fmt.Sprint(article_ids)), ",")
article_ids_str := strings.Trim(strings.Replace(fmt.Sprint(article_ids), " ", ",", -1), "[]")
rows, err := mysqlConnection.Database.Query("SELECT id, title, tags, update_time, create_time FROM web_article WHERE id IN (" + article_ids_str + ")")
if err != nil {
log.Println("获取图片集列表失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
defer rows.Close()
for rows.Next() {
var article Article
rows.Scan(&article.Id, &article.Title, &article.Tags, &article.UpdateTime, &article.CreateTime)
article.UpdateTime = article.UpdateTime.UTC()
article.CreateTime = article.CreateTime.UTC()
articles = append(articles, article)
}
}
// 附加图片集信息(第三步: 将图片集信息附加到图片信息中)
for i, image := range image_list {
for _, article := range articles {
if image.Article.Id == article.Id {
image_list[i].Article = article
}
}
}
// 将 []Image 转换为 []interface{}
images.List = make([]interface{}, len(image_list))
for i, v := range image_list {
images.List[i] = v
}
// 获取总数
err = mysqlConnection.Database.QueryRow("SELECT COUNT(*) FROM web_images" + conditions).Scan(&images.Total)
if err != nil {
log.Println("获取图片总数失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
// 是否有下一页
images.Next = images.Total > images.Page*images.PageSize
// 将对象转换为有缩进的JSON输出
data, err := json.MarshalIndent(images, "", " ")
if err != nil {
log.Println("转换图片列表失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "application/json; charset=UTF-8")
w.Write(data)
})
// 获取标签列表
http.HandleFunc("/tags", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// 标签的原理
// 1. 通过文章的 tag 字段, 获取所有的标签
// 2. 通过标签的 id, 获取标签的名称
// 热门权重指数的标签排序
// 1. 标签的权重指数 = (标签的文章数 * 标签的文章数) * 近期增幅
// 2. 标签的近期增幅 = (标签的文章数 - 标签的文章数) / 标签的文章数
// 标签是一个虚拟表, ORC 提取的数据都带有多个维度的比重概率(分布概率, 对比度概率, 文字大小, 文字重量, 词频概率, 词性概率, 词长概率, 词序概率)
// 经过规则过滤后, 用动态调参的指数计算乘积作为权重, 权重仍达到某个阈值的数据才会被视为标签
// 获取查询条件(忽略空值), 超级简洁写法
QueryConditions := func(key string) (list []string) {
if r.FormValue(key) != "" {
list = strings.Split(r.FormValue(key), ",")
}
return
}
// 拼接查询条件, 超级简洁写法
conditions := ""
if authors := QueryConditions("authors"); len(authors) > 0 {
conditions += fmt.Sprintf(" AND author IN (%s)", strings.Join(authors, ","))
}
if tags := QueryConditions("tags"); len(tags) > 0 {
conditions += fmt.Sprintf(" AND tag IN (%s)", strings.Join(tags, ","))
}
if categories := QueryConditions("categories"); len(categories) > 0 {
conditions += fmt.Sprintf(" AND categorie IN (%s)", strings.Join(categories, ","))
}
// 获取标签列表
var tags ListView
tags.Page, tags.PageSize = stringToInt(r.FormValue("page"), 1), stringToInt(r.FormValue("pageSize"), 10)
rows, err := mysqlConnection.Database.Query("SELECT id, name, update_time, create_time FROM web_tags"+conditions+" ORDER BY id DESC LIMIT ?, ?", (tags.Page-1)*tags.PageSize, tags.PageSize)
if err != nil {
log.Println(err)
return
}
defer rows.Close()
for rows.Next() {
var tag Tag
if err := rows.Scan(&tag.Id, &tag.Name, &tag.UpdateTime, &tag.CreateTime); err != nil {
log.Println(err)
continue
}
tags.List = append(tags.List, tag)
}
if err := rows.Err(); err != nil {
log.Println(err)
return
}
// 获取总数
if err := mysqlConnection.Database.QueryRow("SELECT COUNT(*) FROM web_tags" + conditions).Scan(&tags.Total); err != nil {
log.Println(err)
return
}
// 是否有下一页
tags.Next = tags.Total > tags.Page*tags.PageSize
// 将对象转换为有缩进的JSON输出
json, err := json.MarshalIndent(tags, "", " ")
if err != nil {
log.Println(err)
return
}
// 输出JSON
w.Header().Set("Content-Type", "application/json")
w.Write(json)
})
// URL 格式: /img/{type}-{id}.{format}?width=320&height=320&fit=cover
http.HandleFunc("/img/", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
reg := regexp.MustCompile(`^/img/([0-9a-zA-Z]+)-([0-9a-zA-Z]+).(jpg|jpeg|png|webp)$`)
matches := reg.FindStringSubmatch(r.URL.Path)
if len(matches) != 4 {
http.Error(w, "URL 格式错误", http.StatusNotFound)
return
}
group, id, format, width, height, fit := matches[1], matches[2], matches[3], stringToInt(r.URL.Query().Get("width"), 0), stringToInt(r.URL.Query().Get("height"), 0), r.URL.Query().Get("fit")
content, err := mysqlConnection.GetImageContent(group, id)
if err != nil {
log.Println("获取图片失败", format, err)
http.Error(w, err.Error(), http.StatusNotFound)
return
}
var img models.Image
if err := img.Init(content); err != nil {
log.Println("初始化图片失败", format, err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
data, err := img.ToWebP(width, height, fit)
if err != nil {
log.Println("转换图片失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "image/webp")
w.Header().Set("Cache-Control", "max-age=31536000")
w.Write(data)
})
// URL 格式: /webp/{type}-{id}-{version}-{width}-{height}-{fit}.{format}
http.HandleFunc("/webp/", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
reg := regexp.MustCompile(`^/webp/([0-9a-zA-Z]+)-([0-9a-zA-Z]+)-([0-9a-zA-Z]+)-([0-9]+)-([0-9]+)-([a-zA-Z]+).(jpg|jpeg|png|webp)$`)
matches := reg.FindStringSubmatch(r.URL.Path)
if len(matches) != 8 {
log.Println("URL 格式错误", matches)
w.WriteHeader(http.StatusNotFound)
return
}
group, id, version, width, height, fit, format := matches[1], matches[2], matches[3], stringToInt(matches[4], 0), stringToInt(matches[5], 0), matches[6], matches[7]
content, err := mysqlConnection.GetImageContent(group, id)
if err != nil {
log.Println("获取图片失败", version, format, err)
w.WriteHeader(http.StatusNotFound)
return
}
var img models.Image
if err := img.Init(content); err != nil {
log.Println("初始化图片失败", version, format, err)
w.WriteHeader(http.StatusNotFound)
return
}
data, err := img.ToWebP(width, height, fit)
if err != nil {
log.Println("转换图片失败", err)
w.WriteHeader(http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "image/webp")
w.Header().Set("Cache-Control", "max-age=31536000")
w.Write(data)
})
// 获取转换后的m3u8视频链接
http.HandleFunc("/video", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
queryParam := r.URL.Query().Get("url")
safeParam, err := url.QueryUnescape(queryParam)
if err != nil {
log.Println("解码URL失败", err)
w.WriteHeader(http.StatusBadRequest)
return
}
fmt.Println("safeParam", safeParam)
urls, err := models.GetVideoM3U8(safeParam)
if err != nil {
log.Println("获取视频链接失败", err)
w.WriteHeader(http.StatusBadRequest)
return
}
// 将对象转换为有缩进的JSON输出
json, _ := json.MarshalIndent(urls, "", " ")
w.Header().Set("Content-Type", "application/json")
w.Write(json)
})
// 从Viper中读取配置
port := viper.GetString("server.port")
log.Println("Server is running at http://localhost:" + port)
http.ListenAndServe(":"+port, nil)
}