Files
webp/bin/main.go
2024-07-20 10:30:48 +08:00

693 lines
22 KiB
Go

package main
import (
"context"
"fmt"
"log"
"net/http"
"net/url"
"runtime"
"strings"
"time"
"regexp"
"strconv"
"encoding/json"
"git.satori.love/gameui/webp/api"
"git.satori.love/gameui/webp/models"
_ "github.com/go-sql-driver/mysql"
"github.com/graphql-go/handler"
"github.com/milvus-io/milvus-sdk-go/v2/entity"
"github.com/spf13/viper"
lru "github.com/hashicorp/golang-lru/v2"
)
// string 转换为 int, 如果转换失败则返回默认值
func stringToInt(str string, defaultValue int) int {
if str == "" {
return defaultValue
}
value, err := strconv.Atoi(str)
if err != nil {
return defaultValue
}
return value
}
func LogComponent(startTime int64, r *http.Request) {
ms := (time.Now().UnixNano() - startTime) / 1000000
color := "\033[1;32m%d\033[0m"
if ms > 800 {
color = "\033[1;31m%dms\033[0m" // 紅色加重
} else if ms > 500 {
color = "\033[1;33m%dms\033[0m" // 黃色加重
} else if ms > 300 {
color = "\033[1;32m%dms\033[0m" // 綠色加重
} else if ms > 200 {
color = "\033[1;34m%dms\033[0m" // 藍色加重
} else if ms > 100 {
color = "\033[1;35m%dms\033[0m" // 紫色加重
} else {
color = "\033[1;36m%dms\033[0m" // 黑色加重
}
endTime := fmt.Sprintf(color, ms)
method := fmt.Sprintf("\033[1;32m%s\033[0m", r.Method) // 綠色加重
url := fmt.Sprintf("\033[1;34m%s\033[0m", r.URL) // 藍色加重
log.Println(method, url, endTime)
}
type Image struct {
Id int `json:"id" db:"id"`
Width int `json:"width" db:"width"`
Height int `json:"height" db:"height"`
Content string `json:"content" db:"content"`
ArticleCategoryTopId int `json:"article_category_top_id" db:"article_category_top_id"`
PraiseCount int `json:"praise_count" db:"praise_count"`
CollectCount int `json:"collect_count" db:"collect_count"`
CreateTime time.Time `json:"createTime" db:"createTime"`
UpdateTime time.Time `json:"updateTime" db:"updateTime"`
UserID int `json:"user_id" db:"user_id"`
User models.User `json:"user" db:"user"`
Article models.Article `json:"article" db:"article"`
}
type Tag struct {
Id int `json:"id"`
Name string `json:"name"`
CreateTime time.Time `json:"create_time"`
UpdateTime time.Time `json:"update_time"`
}
type History struct {
Type string `json:"type"`
CreateTime time.Time `json:"create_time"`
Data interface{} `json:"data"`
}
type ListView struct {
Code int `json:"code"`
Page int `json:"page"`
PageSize int `json:"pageSize"`
Total int `json:"total"`
Next bool `json:"next"`
List []interface{} `json:"list"`
}
var mysqlConnection models.MysqlConnection
var milvusConnection models.MilvusConnection
func GetNetWorkEmbedding(id int) (embedding []float32) {
host := viper.GetString("embedding.host")
port := viper.GetInt("embedding.port")
httpClient := &http.Client{}
request, err := http.NewRequest("PUT", fmt.Sprintf("http://%s:%d/api/default/%d", host, port, id), nil)
if err != nil {
log.Println("请求失败1:", err)
return
}
response, err := httpClient.Do(request)
if err != nil {
log.Println("请求失败2:", err)
return
}
defer response.Body.Close()
var result struct {
Code int `json:"code"`
Message string `json:"message"`
Feature []float32 `json:"feature"`
}
err = json.NewDecoder(response.Body).Decode(&result)
if err != nil {
log.Println("解析失败:", err)
return
}
if result.Code != 0 {
log.Println("请求失败3:", result.Message)
return
}
return result.Feature
}
var lruCache, _ = lru.New[int, []int64](1000)
func (image *Image) GetSimilarImagesIdList(collection_name string) (ids []int64) {
ctx := context.Background()
// 先从 LRU 中查询缓存的结果, 如果缓存中有, 直接返回
if value, ok := lruCache.Get(image.Id); ok {
return value
}
// 先从milvus中查询图片的向量
var embedding []float32
result, err := milvusConnection.Client.Query(ctx, collection_name, nil, fmt.Sprintf("id in [%d]", image.Id), []string{"embedding"})
if err != nil {
log.Println("查詢向量失敗:", err)
embedding = GetNetWorkEmbedding(image.Id)
} else {
for _, item := range result {
if item.Name() == "embedding" {
embedding = item.FieldData().GetVectors().GetFloatVector().Data
continue
}
}
}
// 处理向量不存在的情况
if len(embedding) == 0 {
log.Println("向量不存在, 也未能重新生成")
return ids
}
// 用向量查询相似图片
topk := 200
sp, _ := entity.NewIndexIvfFlatSearchParam(64)
vectors := []entity.Vector{entity.FloatVector(embedding)}
resultx, err := milvusConnection.Client.Search(ctx, collection_name, nil, "", []string{"id", "article_id"}, vectors, "embedding", entity.L2, topk, sp)
if err != nil {
log.Println("搜索相似失敗:", err)
return
}
// 输出结果
for _, item := range resultx {
ids = item.IDs.FieldData().GetScalars().GetLongData().GetData()
}
// 将结果缓存到 LRU 中
lruCache.Add(image.Id, ids)
return ids
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
viper.SetConfigFile("./data/config.yaml")
if err := viper.ReadInConfig(); err != nil {
log.Println("读取配置文件失败", err)
}
config := viper.GetViper()
models.InitConfig(config)
models.ZincInit()
mysqlConnection.Init()
milvusConnection.Init()
err := milvusConnection.Client.LoadCollection(context.Background(), "default", false)
if err != nil {
log.Println("Milvus load collection failed:", err)
return
}
// graphql Schema
schema, err := api.NewSchema(api.Config{
Mysql: api.ConfigMysql{
Host: config.GetString("mysql.host"),
Port: config.GetInt("mysql.port"),
Database: config.GetString("mysql.database"),
UserName: config.GetString("mysql.user"),
Password: config.GetString("mysql.password"),
},
})
if err != nil {
log.Fatalf("failed to create new schema, error: %v", err)
}
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
w.Write([]byte("Hello World!"))
})
http.Handle("/api", handler.New(&handler.Config{
Schema: &schema,
Playground: true,
Pretty: false,
}))
//http.HandleFunc("/api", func(w http.ResponseWriter, r *http.Request) {
// defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// query := r.URL.Query().Get("query")
// params := graphql.Params{Schema: schema, RequestString: query}
// result := graphql.Do(params)
// if len(result.Errors) > 0 {
// fmt.Printf("failed to execute graphql operation, errors: %+v", result.Errors)
// http.Error(w, result.Errors[0].Error(), 500)
// return
// }
// rJSON, _ := json.MarshalIndent(result.Data, "", " ")
// w.Write(rJSON)
//})
http.HandleFunc("/favicon.ico", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
http.Error(w, "Not Found", http.StatusNotFound)
})
http.HandleFunc("/api/default", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
http.Error(w, "Not Found", http.StatusNotFound)
})
// 获取浏览记录
http.HandleFunc("/history", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// 按会话过滤
// 按用户过滤
// 按时间过滤
// 按类型过滤
// 按数据过滤
// 日志记录器:
// 会话记录, 调取也从SDK本地取数据
// URL变化或新开 [URL, 来源, 时间]
// 针对某些组件挂载的事件 [组件, 事件, 时间]
// 记录用户行为 [用户, 行为, 时间]
// 查看过的[图片, 文章, 精选集, 用户]
// 请求过的API连接
// 展示了的数据
// 展示后被点击的数据(+正反馈)
// 展示后被收藏的数据
// 展示后被分享的数据
// 展示后被评论的数据
// 展示后被点赞的数据
// 展示后被下载的数据
// 展示后被忽略的数据(+负反馈)
// 展示后被屏蔽的数据(+屏蔽功能)
// 获取用户id
userId := stringToInt(r.URL.Query().Get("user_id"), 0)
if userId == 0 {
w.Write([]byte("user_id is required"))
return
}
// 获取图片id列表
var ids []int64
err := json.Unmarshal([]byte(r.URL.Query().Get("ids")), &ids)
if err != nil {
w.Write([]byte("ids is required"))
return
}
// 获取图片信息列表
var images []Image
//mysqlConnection.DB.Where("id in (?)", ids).Find(&images)
// 返回结果
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(images)
})
// 获取图片信息列表(分页)
http.HandleFunc("/api/images", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// 私域: (自己的图片, 自己的文章, 自己的精选集, 点赞收藏精选集)
// 条件查询(模糊搜索, 时间区间, 作者, 标签, 分类, 精选集, 状态, 置顶, 模糊权重)(权重规则:权重指数)
// 条件筛选(交集, 并集, 差集, 子集)
// 文字搜索支持翻页
// 文字搜索支持与按颜色筛选混合
// 获取查询条件(忽略空值)
QueryConditions := func(key string) (list []string) {
for _, item := range strings.Split(r.URL.Query().Get(key), ",") {
if item != "" {
list = append(list, fmt.Sprintf("'%s'", item))
}
}
return list
}
// 拼接基本查询条件
var addCondition = func(conditions *strings.Builder, key, column string) {
if values := QueryConditions(key); len(values) > 0 {
if conditions.Len() > 0 {
conditions.WriteString(" AND")
} else {
conditions.WriteString(" WHERE")
}
conditions.WriteString(fmt.Sprintf(" %s IN (%s)", column, strings.Join(values, ",")))
}
}
var conditions strings.Builder
// 如果是查询 text, 直接从 Elasticsearch 返回结果
var text_ids []string
if text := QueryConditions("text"); len(text) > 0 {
text_ids := models.ElasticsearchSearch(strings.Join(text, " ")).GetIDList(0, 0, 0, 0)
if len(text_ids) > 0 {
conditions.WriteString(fmt.Sprintf(" WHERE id IN (%s)", strings.Trim(strings.Replace(fmt.Sprint(text_ids), " ", ",", -1), "[]")))
} else {
// 直接返回空列表
var images ListView
images.Page, images.PageSize = stringToInt(r.URL.Query().Get("page"), 1), stringToInt(r.URL.Query().Get("pageSize"), 20)
images.Total = 0
images.Next = false
images.List = make([]interface{}, 0)
// 将对象转换为有缩进的JSON输出
data, _ := json.MarshalIndent(images, "", " ")
w.Header().Set("Content-Type", "application/json; charset=UTF-8")
w.Write(data)
return
}
if conditions.Len() > 1024 {
log.Println("查询条件过长")
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
} else {
addCondition(&conditions, "authors", "author")
addCondition(&conditions, "tags", "tag")
addCondition(&conditions, "categories", "categorie")
addCondition(&conditions, "sets", "sets")
}
// 获取图片列表
var images ListView
var image_list []Image
images.Page, images.PageSize = stringToInt(r.URL.Query().Get("page"), 1), stringToInt(r.URL.Query().Get("pageSize"), 20)
var ids []int64
if similar := QueryConditions("similar"); len(similar) > 0 {
id, err := strconv.Atoi(strings.Trim(similar[0], "'"))
if err != nil {
log.Println("strconv.Atoi failed:", err)
return
}
// 如果指定以某个图片为基准的相似图片列表范围, 获取相似图片ID的列表
ids = (&Image{Id: id}).GetSimilarImagesIdList("default")
images.Total = len(ids)
// 按照分页取相应的图片ID
if len(ids) > images.Page*images.PageSize {
ids = ids[(images.Page-1)*images.PageSize : images.Page*images.PageSize]
} else {
ids = ids[(images.Page-1)*images.PageSize:]
}
idsStr := make([]string, len(ids))
for i, v := range ids {
idsStr[i] = strconv.FormatInt(v, 10)
}
if len(idsStr) > 0 {
if conditions.Len() > 0 {
conditions.WriteString(" AND")
} else {
conditions.WriteString(" WHERE")
}
conditions.WriteString(fmt.Sprintf(" id IN (%s)", strings.Join(idsStr, ","))) // 拼接查询条件
}
}
sql := fmt.Sprintf("SELECT id, width, height, content, update_time, create_time, user_id, article_id, article_category_top_id, praise_count, collect_count FROM web_images %s", conditions.String())
rows, err := mysqlConnection.Database.Query(sql)
if err != nil {
log.Println("获取图片列表失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
defer rows.Close()
for rows.Next() {
var image Image
rows.Scan(&image.Id, &image.Width, &image.Height, &image.Content, &image.UpdateTime, &image.CreateTime, &image.User.ID, &image.Article.Id, &image.ArticleCategoryTopId, &image.PraiseCount, &image.CollectCount)
image.UpdateTime = image.UpdateTime.UTC()
image.CreateTime = image.CreateTime.UTC()
image.Content = regexp.MustCompile(`http:`).ReplaceAllString(image.Content, "https:")
image_list = append(image_list, image)
}
// 如果使用了相似图片列表范围, 需要按照相似图片ID原本的顺序重新排序, 需要注意的是, 相似图片ID列表中可能会包含不在当前页的图片ID
if similar := QueryConditions("similar"); len(similar) > 0 {
var image_list_sorted []Image
for _, id := range ids {
for _, image := range image_list {
if image.Id == int(id) {
image_list_sorted = append(image_list_sorted, image)
}
}
}
image_list = image_list_sorted
}
// 如果使用了图像文字检索, 需要按照图像文字检索的相似度重新排序 text_ids
if len(text_ids) > 0 {
var image_list_sorted []Image
for _, id := range text_ids {
for _, image := range image_list {
if id == strconv.Itoa(image.Id) {
image_list_sorted = append(image_list_sorted, image)
}
}
}
image_list = image_list_sorted
}
// 用户ID, 文章ID
var user_ids []int
var article_ids []int
for _, image := range image_list {
user_ids = append(user_ids, image.User.ID)
article_ids = append(article_ids, image.Article.Id)
}
// 附加用户信息
users := models.QueryUserList(user_ids)
for i, image := range image_list {
for _, user := range users {
if image.User.ID == user.ID {
image_list[i].User = user
}
}
}
// 附加文章信息
articles := models.QueryArticleList(article_ids)
for i, image := range image_list {
for _, article := range articles {
if image.Article.Id == article.Id {
image_list[i].Article = article
}
}
}
// 将 []Image 转换为 []interface{}
images.List = make([]interface{}, len(image_list))
for i, v := range image_list {
images.List[i] = v
}
// 如果不是获取相似图像固定数量, 则从mysql获取总数
if similar := QueryConditions("similar"); len(similar) > 0 {
// 固定数量
} else {
// 获取总数
err = mysqlConnection.Database.QueryRow("SELECT COUNT(*) FROM web_images" + conditions.String()).Scan(&images.Total)
if err != nil {
log.Println("获取图片总数失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
}
// 是否有下一页
images.Next = images.Total > images.Page*images.PageSize
// 将对象转换为有缩进的JSON输出
data, err := json.MarshalIndent(images, "", " ")
if err != nil {
log.Println("转换图片列表失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "application/json; charset=UTF-8")
w.Write(data)
})
// 获取标签列表
http.HandleFunc("/tags", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
// 标签的原理
// 1. 通过文章的 tag 字段, 获取所有的标签
// 2. 通过标签的 id, 获取标签的名称
// 热门权重指数的标签排序
// 1. 标签的权重指数 = (标签的文章数 * 标签的文章数) * 近期增幅
// 2. 标签的近期增幅 = (标签的文章数 - 标签的文章数) / 标签的文章数
// 标签是一个虚拟表, ORC 提取的数据都带有多个维度的比重概率(分布概率, 对比度概率, 文字大小, 文字重量, 词频概率, 词性概率, 词长概率, 词序概率)
// 经过规则过滤后, 用动态调参的指数计算乘积作为权重, 权重仍达到某个阈值的数据才会被视为标签
// 获取查询条件(忽略空值), 超级简洁写法
QueryConditions := func(key string) (list []string) {
if r.FormValue(key) != "" {
list = strings.Split(r.FormValue(key), ",")
}
return
}
// 拼接查询条件, 超级简洁写法
conditions := ""
if authors := QueryConditions("authors"); len(authors) > 0 {
conditions += fmt.Sprintf(" AND author IN (%s)", strings.Join(authors, ","))
}
if tags := QueryConditions("tags"); len(tags) > 0 {
conditions += fmt.Sprintf(" AND tag IN (%s)", strings.Join(tags, ","))
}
if categories := QueryConditions("categories"); len(categories) > 0 {
conditions += fmt.Sprintf(" AND categorie IN (%s)", strings.Join(categories, ","))
}
// 获取标签列表
var tags ListView
tags.Page, tags.PageSize = stringToInt(r.FormValue("page"), 1), stringToInt(r.FormValue("pageSize"), 20)
rows, err := mysqlConnection.Database.Query("SELECT id, name, update_time, create_time FROM web_tags"+conditions+" ORDER BY id DESC LIMIT ?, ?", (tags.Page-1)*tags.PageSize, tags.PageSize)
if err != nil {
log.Println(err)
return
}
defer rows.Close()
for rows.Next() {
var tag Tag
if err := rows.Scan(&tag.Id, &tag.Name, &tag.UpdateTime, &tag.CreateTime); err != nil {
log.Println(err)
continue
}
tags.List = append(tags.List, tag)
}
if err := rows.Err(); err != nil {
log.Println(err)
return
}
// 获取总数
if err := mysqlConnection.Database.QueryRow("SELECT COUNT(*) FROM web_tags" + conditions).Scan(&tags.Total); err != nil {
log.Println(err)
return
}
// 是否有下一页
tags.Next = tags.Total > tags.Page*tags.PageSize
// 将对象转换为有缩进的JSON输出
json, err := json.MarshalIndent(tags, "", " ")
if err != nil {
log.Println(err)
return
}
// 输出JSON
w.Header().Set("Content-Type", "application/json")
w.Write(json)
})
// URL 格式: /img/{type}-{id}.{format}?width=320&height=320&fit=cover
http.HandleFunc("/img/", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
reg := regexp.MustCompile(`^/img/([0-9a-zA-Z]+)-([0-9a-zA-Z]+).(jpg|jpeg|png|webp)$`)
matches := reg.FindStringSubmatch(r.URL.Path)
if len(matches) != 4 {
http.Error(w, "URL 格式错误", http.StatusNotFound)
return
}
group, id, format, width, height, fit := matches[1], matches[2], matches[3], stringToInt(r.URL.Query().Get("width"), 0), stringToInt(r.URL.Query().Get("height"), 0), r.URL.Query().Get("fit")
content, err := mysqlConnection.GetImageContent(group, id)
if err != nil {
log.Println("获取图片失败", format, err)
http.Error(w, err.Error(), http.StatusNotFound)
return
}
var img models.Image
if err := img.Init(content); err != nil {
log.Println("初始化图片失败", format, err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
data, err := img.ToWebP(width, height, fit)
if err != nil {
log.Println("转换图片失败", err)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "image/webp")
w.Header().Set("Cache-Control", "max-age=31536000")
w.Write(data)
})
// URL 格式: /webp/{type}-{id}-{version}-{width}-{height}-{fit}.{format}
http.HandleFunc("/webp/", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
reg := regexp.MustCompile(`^/webp/([0-9a-zA-Z]+)-([0-9a-zA-Z]+)-([0-9a-zA-Z]+)-([0-9]+)-([0-9]+)-([a-zA-Z]+).(jpg|jpeg|png|webp)$`)
matches := reg.FindStringSubmatch(r.URL.Path)
if len(matches) != 8 {
log.Println("URL 格式错误", matches)
w.WriteHeader(http.StatusNotFound)
return
}
group, id, version, width, height, fit, format := matches[1], matches[2], matches[3], stringToInt(matches[4], 0), stringToInt(matches[5], 0), matches[6], matches[7]
content, err := mysqlConnection.GetImageContent(group, id)
if err != nil {
log.Println("获取图片失败", version, format, err)
w.WriteHeader(http.StatusNotFound)
return
}
var img models.Image
if err := img.Init(content); err != nil {
log.Println("初始化图片失败", version, format, err)
w.WriteHeader(http.StatusNotFound)
return
}
data, err := img.ToWebP(width, height, fit)
if err != nil {
log.Println("转换图片失败", err)
w.WriteHeader(http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "image/webp")
w.Header().Set("Cache-Control", "max-age=31536000")
w.Write(data)
})
// 获取转换后的m3u8视频链接
http.HandleFunc("/video", func(w http.ResponseWriter, r *http.Request) {
defer LogComponent(time.Now().UnixNano(), r) // 最后打印日志
queryParam := r.URL.Query().Get("url")
safeParam, err := url.QueryUnescape(queryParam)
if err != nil {
log.Println("解码URL失败", err)
w.WriteHeader(http.StatusBadRequest)
return
}
fmt.Println("safeParam", safeParam)
urls, err := models.GetVideoM3U8(safeParam)
fmt.Println("urls", urls, err)
if err != nil {
log.Println("获取视频链接失败", err)
w.WriteHeader(http.StatusBadRequest)
return
}
// 将对象转换为有缩进的JSON输出
json, _ := json.MarshalIndent(urls, "", " ")
w.Header().Set("Content-Type", "application/json")
w.Write(json)
})
// 从Viper中读取配置
port := viper.GetString("server.port")
log.Println("Server is running at http://localhost:" + port)
http.ListenAndServe(":"+port, nil)
}