fonchain-fiee/pkg/common/qwen/qwen_vl.go

package qwen

import (
	"encoding/json"
	"errors"
	"fmt"
	modelQwen "fonchain-fiee/pkg/model/qwen"
	"fonchain-fiee/pkg/utils"

	"go.uber.org/zap"
)

// VL 调用通义千问视觉多模态API，支持多个视频、多张图片和文本
func VL(videoURLs []string, imageURLs []string, text string, model string) (resp *modelQwen.VLResponse, err error) {
	// 设置默认模型
	if model == "" {
		model = "qwen3-vl-plus"
	}

	// 构建内容列表
	content := make([]modelQwen.VLContent, 0)

	// 添加视频内容，支持自定义fps
	for _, videoURL := range videoURLs {
		fps := 2 // 默认fps为2
		content = append(content, modelQwen.VLContent{
			Type: "video_url",
			VideoURL: &modelQwen.VideoURL{
				URL: videoURL,
			},
			FPS: fps,
		})
	}

	// 添加图片内容
	for _, imageURL := range imageURLs {
		content = append(content, modelQwen.VLContent{
			Type: "image_url",
			ImageURL: &modelQwen.ImageURL{
				URL: imageURL,
			},
		})
	}

	// 添加文本内容
	if text != "" {
		content = append(content, modelQwen.VLContent{
			Type: "text",
			Text: text,
		})
	}

	// 构建请求
	req := modelQwen.VLRequest{
		Model: model,
		Messages: []modelQwen.VLMessage{
			{
				Role:    "user",
				Content: content,
			},
		},
	}

	// 序列化请求
	jsonData, err := json.Marshal(req)
	if err != nil {
		zap.L().Error("VL Marshal failed", zap.Error(err))
		return nil, errors.New("序列化请求失败")
	}

	// 发送请求，使用PostBytesHeader获取状态码和响应体
	statusCode, body, err := utils.PostBytesHeader(modelQwen.DashscopeVLURL, map[string]interface{}{
		"Authorization": "Bearer " + modelQwen.DashscopeAPIKey,
		"Content-Type":  "application/json",
	}, jsonData)
	if err != nil {
		zap.L().Error("VL Post failed", zap.Error(err))
		return nil, errors.New("请求视觉AI失败")
	}

	// 检查状态码，如果不是200，尝试解析错误响应
	if statusCode != 200 {
		// 尝试解析错误响应
		var errorResp struct {
			Error struct {
				Message string `json:"message"`
				Type    string `json:"type"`
				Code    string `json:"code"`
			} `json:"error"`
		}
		if err := json.Unmarshal(body, &errorResp); err == nil && errorResp.Error.Message != "" {
			zap.L().Error("VL API error", zap.Int("status", statusCode), zap.String("message", errorResp.Error.Message))
			return nil, fmt.Errorf("%s", errorResp.Error.Message)
		}
		// 如果无法解析错误响应，返回通用错误
		zap.L().Error("VL API error", zap.Int("status", statusCode), zap.String("body", string(body)))
		return nil, fmt.Errorf("接口返回错误")
	}

	// 解析响应
	var result modelQwen.VLResponse
	if err = json.Unmarshal(body, &result); err != nil {
		zap.L().Error("VL Unmarshal failed", zap.Error(err), zap.String("body", string(body)))
		return nil, fmt.Errorf("解析响应失败: %v", err)
	}

	return &result, nil
}