python调用硅基流动的视觉语言模型
参考: https://docs.siliconflow.cn/cn/userguide/capabilities/vision
import base64
import json
from openai import OpenAI
from PIL import Image
import io# 初始化OpenAI客户端
client = OpenAI(api_key="sk-**********", # 替换为实际API密钥base_url="https://api.siliconflow.cn/v1"
)def convert_image_to_webp_base64(input_image_path: str) -> str:"""将本地图片转换为WebP格式的Base64字符串"""try:with Image.open(input_image_path) as img:# 转换为WebP格式(优化大小)byte_arr = io.BytesIO()img.save(byte_arr, format='WEBP', quality=85) # 调整质量平衡大小和清晰度byte_arr = byte_arr.getvalue()return base64.b64encode(byte_arr).decode('utf-8')except Exception as e:print(f"图片转换错误: {e}")return None# 1. 转换本地图片
input_image_path = "7125e2e3.jpeg" # 替换为实际图片路径
base64_image = convert_image_to_webp_base64(input_image_path)if not base64_image:print("图片转换失败,请检查路径和格式")exit()# 2. 创建流式请求
response = client.chat.completions.create(model="Qwen/Qwen2.5-VL-72B-Instruct",messages=[{"role": "user","content": [{"type": "image_url","image_url": {"url": f"data:image/webp;base64,{base64_image}", # 指定WebP格式"detail": "high" # 平衡速度与精度}},{"type": "text","text": "使用ocr识别图片内容并输出" # 替换为你的提示词}]}],stream=True,max_tokens=1000 # 控制响应长度
)# 3. 流式处理响应
print("模型响应:")
full_response = ""
for chunk in response:if chunk.choices[0].delta.content:text_chunk = chunk.choices[0].delta.contentprint(text_chunk, end='', flush=True)full_response += text_chunkprint("\n\n完整响应已接收")