fonchain-fiee/pkg/service/ai/video_vl.go

package ai

import (
	"errors"
	"fonchain-fiee/pkg/common/qwen"
	"fonchain-fiee/pkg/service"
	"strings"

	"github.com/gin-gonic/gin"
)

// VideoVLRequest 视频/图片理解请求参数
type VideoVLRequest struct {
	Videos []string `json:"videos"` // 视频URL列表
	Images []string `json:"images"` // 图片URL列表
	Text   string   `json:"text"`   // 可选的文本提示
	Model  string   `json:"model"`  // 可选的模型名称，默认使用 qwen3-vl-plus
}

// AIVideoVL AI理解视频/图片接口
func AIVideoVL(ctx *gin.Context) {
	var req VideoVLRequest
	if err := ctx.ShouldBindJSON(&req); err != nil {
		service.Error(ctx, errors.New("参数错误"))
		return
	}

	// 检查是否至少提供了视频或图片
	if len(req.Videos) == 0 && len(req.Images) == 0 {
		service.Error(ctx, errors.New("至少需要提供一个视频或图片"))
		return
	}

	if len(req.Videos) > 1 {
		service.Error(ctx, errors.New("当前只能选一个视频"))
		return
	}

	Prompt := "请你详细描述视频和图片中的内容分别是什么"

	// 调用VL函数进行AI理解
	result, err := qwen.VL(req.Videos, req.Images, Prompt, req.Model)
	if err != nil {
		// 检查是否是文件下载超时错误（内容过大）
		errMsg := err.Error()
		if contains(errMsg, "Download multimodal file timed out") || contains(errMsg, "timed out") {
			service.Error(ctx, errors.New("报错内容过大，请重新选择"))
		} else {
			service.Error(ctx, errors.New("ai分析帖子内容失败"))
		}
		return
	}

	// 返回AI返回的数据
	service.Success(ctx, result)
}

// contains 检查字符串是否包含子字符串（不区分大小写）
func contains(s, substr string) bool {
	return strings.Contains(strings.ToLower(s), strings.ToLower(substr))
}