Skip to content

vllm服务不稳定 #1800

@Yison69

Description

@Yison69

启动vllm,仅仅修改我要生成的text,有些能正常生成有些不可以。比如“我爱你呀宝宝”可以生成,“我爱你”就不行。试了很多个例子,目前没有看出什么规律。不行的例子一般都是生成0s音频。还有些情况是A音频可以B音频就不行,还有就是生成的音频后面会有不必要的静音。
`import sys
import os
import io
import uuid
import torch
import torchaudio
import uvicorn
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"

--- 1. 环境与依赖 ---

sys.path.append('third_party/Matcha-TTS')
from vllm import ModelRegistry
from cosyvoice.vllm.cosyvoice2 import CosyVoice2ForCausalLM
from cosyvoice.cli.cosyvoice import AutoModel

ModelRegistry.register_model("CosyVoice2ForCausalLM", CosyVoice2ForCausalLM)

app = FastAPI(title="CosyVoice3 Server-Side Save")

--- 2. 配置资源目录 ---

BASE_DIR = os.path.dirname(os.path.abspath(file))
AUDIO_DIR = os.path.join(BASE_DIR, "asset", "audios")
TEXT_DIR = os.path.join(BASE_DIR, "asset", "texts")

🔥 新增:服务端输出目录

OUTPUT_DIR = os.path.join(BASE_DIR, "asset", "outputs")

os.makedirs(AUDIO_DIR, exist_ok=True)
os.makedirs(TEXT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

model = None

class GenerateRequest(BaseModel):
mode: str = "zero_shot"
text: str
persona: str
instruction: Optional[str] = None
stream: bool = False

@app.on_event("startup")
async def load_model():
global model
print("🚀 [Startup] 正在加载 CosyVoice3 (vLLM)...")
try:
model = AutoModel(
model_dir='pretrained_models/Fun-CosyVoice3-0.5B',
load_trt=True,
load_vllm=True,
fp16=False
)
print("✅ [Startup] 服务就绪!")
except Exception as e:
print(f"❌ 模型加载失败: {e}")
raise e

@app.post("/generate")
async def generate(req: GenerateRequest):
if not model:
raise HTTPException(status_code=500, detail="模型未加载")

# 1. 资源检查
prompt_wav_path = os.path.join(AUDIO_DIR, f"{req.persona}.wav")
prompt_text_path = os.path.join(TEXT_DIR, f"{req.persona}.txt")

if not os.path.exists(prompt_wav_path):
    raise HTTPException(status_code=400, detail=f"角色音频不存在: {req.persona}.wav")

raw_prompt_text = ""
if os.path.exists(prompt_text_path):
    with open(prompt_text_path, 'r', encoding='utf-8') as f:
        raw_prompt_text = f.read().strip()

try:
    output_generator = None
    
    # 2. 推理逻辑
    if req.mode == "zero_shot":
        if not raw_prompt_text:
            raise HTTPException(status_code=400, detail="Zero-shot模式必须有参考文本")
        
        final_prompt_text = f"You are a helpful assistant.<|endofprompt|>{raw_prompt_text}"
        print(f"🎙️ [Server] Zero-Shot 生成中: {req.text[:20]}...")

        output_generator = model.inference_zero_shot(
            req.text, final_prompt_text, prompt_wav_path, stream=False
        )

    elif req.mode == "instruct":
        if not req.instruction:
            raise HTTPException(status_code=400, detail="Instruct模式必须提供指令")
        
        final_instruction = f"You are a helpful assistant. {req.instruction}<|endofprompt|>"
        print(f"🎙️ [Server] Instruct 生成中...")

        output_generator = model.inference_instruct2(
            req.text, final_instruction, prompt_wav_path, stream=False
        )
    else:
        raise HTTPException(status_code=400, detail="不支持的模式")

    # 3. 【核心修改】在服务端拼接并保存 (仿照 example.py)
    all_audio_chunks = []
    for result in output_generator:
        all_audio_chunks.append(result['tts_speech'])
    
    if not all_audio_chunks:
        raise HTTPException(status_code=500, detail="生成结果为空")

    # 拼接 Tensor
    final_audio_tensor = torch.cat(all_audio_chunks, dim=1)

    # 生成唯一文件名
    filename = f"{req.persona}_{uuid.uuid4().hex[:8]}.wav"
    save_path = os.path.join(OUTPUT_DIR, filename)

    # 🔥 直接在服务端保存
    torchaudio.save(save_path, final_audio_tensor, model.sample_rate)
    print(f"✅ [Server] 音频已保存至: {save_path}")

    # 返回 JSON 告诉客户端文件在哪
    return JSONResponse(content={
        "status": "success", 
        "server_path": save_path,
        "filename": filename
    })

except Exception as e:
    import traceback
    traceback.print_exc()
    raise HTTPException(status_code=500, detail=str(e))

@app.post("/add_persona")
async def add_persona(
persona_name: str = Form(...),
audio_file: UploadFile = File(...),
text_content: str = Form(...)
):
try:
save_wav_path = os.path.join(AUDIO_DIR, f"{persona_name}.wav")
save_txt_path = os.path.join(TEXT_DIR, f"{persona_name}.txt")
with open(save_wav_path, "wb") as f:
f.write(await audio_file.read())
with open(save_txt_path, "w", encoding="utf-8") as f:
f.write(text_content.strip())
return JSONResponse(content={"status": "success", "persona": persona_name})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

if name == "main":
uvicorn.run(app, host="0.0.0.0", port=6006)`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions