feat：增加多模态ai测试接口

2026-01-15 14:13:00 +08:00 · 2026-01-15 14:13:00 +08:00 · 222b294101
commit 222b294101
parent 9170c77e32
5 changed files with 200 additions and 0 deletions
--- a/pkg/common/qwen/qwen_vl.go
+++ b/pkg/common/qwen/qwen_vl.go
@ -0,0 +1,89 @@
 package qwen
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	modelQwen "fonchain-fiee/pkg/model/qwen"
 	"fonchain-fiee/pkg/utils"
 	"go.uber.org/zap"
 )
 // VL 调用通义千问视觉多模态API，支持多个视频、多张图片和文本
 func VL(videoURLs []string, imageURLs []string, text string, model string) (resp *modelQwen.VLResponse, err error) {
 	// 设置默认模型
 	if model == "" {
 		model = "qwen3-vl-plus"
 	}
 	// 构建内容列表
 	content := make([]modelQwen.VLContent, 0)
 	// 添加视频内容，支持自定义fps
 	for _, videoURL := range videoURLs {
 		fps := 2 // 默认fps为2
 		content = append(content, modelQwen.VLContent{
 			Type: "video_url",
 			VideoURL: &modelQwen.VideoURL{
 				URL: videoURL,
 			},
 			FPS: fps,
 		})
 	}
 	// 添加图片内容
 	for _, imageURL := range imageURLs {
 		content = append(content, modelQwen.VLContent{
 			Type: "image_url",
 			ImageURL: &modelQwen.ImageURL{
 				URL: imageURL,
 			},
 		})
 	}
 	// 添加文本内容
 	if text != "" {
 		content = append(content, modelQwen.VLContent{
 			Type: "text",
 			Text: text,
 		})
 	}
 	// 构建请求
 	req := modelQwen.VLRequest{
 		Model: model,
 		Messages: []modelQwen.VLMessage{
 			{
 				Role:    "user",
 				Content: content,
 			},
 		},
 	}
 	// 序列化请求
 	jsonData, err := json.Marshal(req)
 	if err != nil {
 		zap.L().Error("VL Marshal failed", zap.Error(err))
 		return nil, errors.New("序列化请求失败")
 	}
 	// 发送请求
 	body, err := utils.PostBytes(modelQwen.DashscopeVLURL, map[string]interface{}{
 		"Authorization": "Bearer " + modelQwen.DashscopeAPIKey,
 		"Content-Type":  "application/json",
 	}, jsonData)
 	if err != nil {
 		zap.L().Error("VL Post failed", zap.Error(err))
 		return nil, errors.New("请求视觉AI失败")
 	}
 	// 解析响应
 	var result modelQwen.VLResponse
 	if err = json.Unmarshal(body, &result); err != nil {
 		zap.L().Error("VL Unmarshal failed", zap.Error(err), zap.String("body", string(body)))
 		return nil, fmt.Errorf("解析响应失败: %v", err)
 	}
 	return &result, nil
 }
--- a/pkg/model/qwen/image.go
+++ b/pkg/model/qwen/image.go
@ -4,6 +4,7 @@ const (
 	DashscopeAPIKey        string = "sk-5ae9df5d3bcf4755ad5d12012058a2e7"
 	DashscopeText2ImageURL string = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis"
 	DashscopeEditImageURL  string = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2image/image-synthesis"
 	DashscopeVLURL         string = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
 )
 // QwenImageRequest 通义千问文生图请求
--- a/pkg/model/qwen/qwen_vl.go
+++ b/pkg/model/qwen/qwen_vl.go
@ -0,0 +1,47 @@
 package qwen
 // VLContent 视觉多模态内容结构，支持文本、图片和视频
 type VLContent struct {
 	Type     string    `json:"type"`                // text, image_url, video_url
 	Text     string    `json:"text,omitempty"`      // type=text 时使用
 	ImageURL *ImageURL `json:"image_url,omitempty"` // type=image_url 时使用
 	VideoURL *VideoURL `json:"video_url,omitempty"` // type=video_url 时使用
 	FPS      int       `json:"fps,omitempty"`       // type=video_url 时可选，视频帧率
 }
 // VideoURL 视频URL结构
 type VideoURL struct {
 	URL string `json:"url"`
 }
 // VLRequest 视觉多模态请求结构
 type VLRequest struct {
 	Model        string      `json:"model"`                   // 模型名称，如 qwen3-vl-plus
 	Messages     []VLMessage `json:"messages"`                // 消息列表
 	Seed         int64       `json:"seed,omitempty"`          // 随机种子
 	EnableSearch bool        `json:"enable_search,omitempty"` // 是否启用搜索
 }
 // VLMessage 视觉多模态消息结构
 type VLMessage struct {
 	Role    string      `json:"role"`    // user, assistant, system
 	Content []VLContent `json:"content"` // 内容列表，可包含文本、图片、视频
 }
 // VLResponse 视觉多模态响应结构
 type VLResponse struct {
 	Choices []VLChoice `json:"choices"`
 	Model   string     `json:"model,omitempty"`
 	ID      string     `json:"id,omitempty"`
 }
 // VLChoice 视觉多模态选择结果
 type VLChoice struct {
 	Message struct {
 		Content          string `json:"content"`
 		ReasoningContent string `json:"reasoning_content"`
 		Role             string `json:"role"`
 	} `json:"message"`
 	FinishReason string `json:"finish_reason"`
 	Index        int    `json:"index,omitempty"`
 }
--- a/pkg/router/media.go
+++ b/pkg/router/media.go
@ -86,6 +86,7 @@ func MediaRouter(r *gin.RouterGroup) {
 	{
 		aiNoAuth.POST("image-generate", serviceAI.AIImageGenerate)
 		aiNoAuth.POST("text-generate", serviceAI.AIChat)
 		aiNoAuth.POST("video-vl", serviceAI.AIVideoVL)
 	}
 	aiAuth := auth.Group("ai")
 	{
--- a/pkg/service/ai/video_vl.go
+++ b/pkg/service/ai/video_vl.go
@ -0,0 +1,62 @@
 package ai
 import (
 	"errors"
 	"fonchain-fiee/pkg/common/qwen"
 	"fonchain-fiee/pkg/service"
 	"fonchain-fiee/pkg/utils"
 	"github.com/gin-gonic/gin"
 )
 // VideoVLRequest 视频/图片理解请求参数
 type VideoVLRequest struct {
 	Videos []string `json:"videos"` // 视频URL列表
 	Images []string `json:"images"` // 图片URL列表
 	Text   string   `json:"text"`   // 可选的文本提示
 	Model  string   `json:"model"`  // 可选的模型名称，默认使用 qwen3-vl-plus
 }
 // AIVideoVL AI理解视频/图片接口
 func AIVideoVL(ctx *gin.Context) {
 	var req VideoVLRequest
 	if err := ctx.ShouldBindJSON(&req); err != nil {
 		service.Error(ctx, errors.New("参数错误"))
 		return
 	}
 	// 检查是否至少提供了视频或图片
 	if len(req.Videos) == 0 && len(req.Images) == 0 {
 		service.Error(ctx, errors.New("至少需要提供一个视频或图片"))
 		return
 	}
 	// 检查视频大小，每个视频不能超过55MB
 	const maxVideoSizeMB = 55
 	for _, videoURL := range req.Videos {
 		if videoURL == "" {
 			continue
 		}
 		// 获取视频文件大小（单位：MB）
 		sizeMB, err := utils.GetRemoteFileSize(videoURL)
 		if err != nil {
 			service.Error(ctx, errors.New("获取视频大小失败: "+err.Error()))
 			return
 		}
 		// 检查是否超过55MB
 		if sizeMB > maxVideoSizeMB {
 			service.Error(ctx, errors.New("作品视频数不能超过55MB"))
 			return
 		}
 	}
 	// 调用VL函数进行AI理解
 	result, err := qwen.VL(req.Videos, req.Images, req.Text, req.Model)
 	if err != nil {
 		service.Error(ctx, err)
 		return
 	}
 	// 返回AI返回的数据
 	service.Success(ctx, result)
 }