-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Description
启动vllm,仅仅修改我要生成的text,有些能正常生成有些不可以。比如“我爱你呀宝宝”可以生成,“我爱你”就不行。试了很多个例子,目前没有看出什么规律。不行的例子一般都是生成0s音频。还有些情况是A音频可以B音频就不行,还有就是生成的音频后面会有不必要的静音。
`import sys
import os
import io
import uuid
import torch
import torchaudio
import uvicorn
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
--- 1. 环境与依赖 ---
sys.path.append('third_party/Matcha-TTS')
from vllm import ModelRegistry
from cosyvoice.vllm.cosyvoice2 import CosyVoice2ForCausalLM
from cosyvoice.cli.cosyvoice import AutoModel
ModelRegistry.register_model("CosyVoice2ForCausalLM", CosyVoice2ForCausalLM)
app = FastAPI(title="CosyVoice3 Server-Side Save")
--- 2. 配置资源目录 ---
BASE_DIR = os.path.dirname(os.path.abspath(file))
AUDIO_DIR = os.path.join(BASE_DIR, "asset", "audios")
TEXT_DIR = os.path.join(BASE_DIR, "asset", "texts")
🔥 新增:服务端输出目录
OUTPUT_DIR = os.path.join(BASE_DIR, "asset", "outputs")
os.makedirs(AUDIO_DIR, exist_ok=True)
os.makedirs(TEXT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
model = None
class GenerateRequest(BaseModel):
mode: str = "zero_shot"
text: str
persona: str
instruction: Optional[str] = None
stream: bool = False
@app.on_event("startup")
async def load_model():
global model
print("🚀 [Startup] 正在加载 CosyVoice3 (vLLM)...")
try:
model = AutoModel(
model_dir='pretrained_models/Fun-CosyVoice3-0.5B',
load_trt=True,
load_vllm=True,
fp16=False
)
print("✅ [Startup] 服务就绪!")
except Exception as e:
print(f"❌ 模型加载失败: {e}")
raise e
@app.post("/generate")
async def generate(req: GenerateRequest):
if not model:
raise HTTPException(status_code=500, detail="模型未加载")
# 1. 资源检查
prompt_wav_path = os.path.join(AUDIO_DIR, f"{req.persona}.wav")
prompt_text_path = os.path.join(TEXT_DIR, f"{req.persona}.txt")
if not os.path.exists(prompt_wav_path):
raise HTTPException(status_code=400, detail=f"角色音频不存在: {req.persona}.wav")
raw_prompt_text = ""
if os.path.exists(prompt_text_path):
with open(prompt_text_path, 'r', encoding='utf-8') as f:
raw_prompt_text = f.read().strip()
try:
output_generator = None
# 2. 推理逻辑
if req.mode == "zero_shot":
if not raw_prompt_text:
raise HTTPException(status_code=400, detail="Zero-shot模式必须有参考文本")
final_prompt_text = f"You are a helpful assistant.<|endofprompt|>{raw_prompt_text}"
print(f"🎙️ [Server] Zero-Shot 生成中: {req.text[:20]}...")
output_generator = model.inference_zero_shot(
req.text, final_prompt_text, prompt_wav_path, stream=False
)
elif req.mode == "instruct":
if not req.instruction:
raise HTTPException(status_code=400, detail="Instruct模式必须提供指令")
final_instruction = f"You are a helpful assistant. {req.instruction}<|endofprompt|>"
print(f"🎙️ [Server] Instruct 生成中...")
output_generator = model.inference_instruct2(
req.text, final_instruction, prompt_wav_path, stream=False
)
else:
raise HTTPException(status_code=400, detail="不支持的模式")
# 3. 【核心修改】在服务端拼接并保存 (仿照 example.py)
all_audio_chunks = []
for result in output_generator:
all_audio_chunks.append(result['tts_speech'])
if not all_audio_chunks:
raise HTTPException(status_code=500, detail="生成结果为空")
# 拼接 Tensor
final_audio_tensor = torch.cat(all_audio_chunks, dim=1)
# 生成唯一文件名
filename = f"{req.persona}_{uuid.uuid4().hex[:8]}.wav"
save_path = os.path.join(OUTPUT_DIR, filename)
# 🔥 直接在服务端保存
torchaudio.save(save_path, final_audio_tensor, model.sample_rate)
print(f"✅ [Server] 音频已保存至: {save_path}")
# 返回 JSON 告诉客户端文件在哪
return JSONResponse(content={
"status": "success",
"server_path": save_path,
"filename": filename
})
except Exception as e:
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.post("/add_persona")
async def add_persona(
persona_name: str = Form(...),
audio_file: UploadFile = File(...),
text_content: str = Form(...)
):
try:
save_wav_path = os.path.join(AUDIO_DIR, f"{persona_name}.wav")
save_txt_path = os.path.join(TEXT_DIR, f"{persona_name}.txt")
with open(save_wav_path, "wb") as f:
f.write(await audio_file.read())
with open(save_txt_path, "w", encoding="utf-8") as f:
f.write(text_content.strip())
return JSONResponse(content={"status": "success", "persona": persona_name})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if name == "main":
uvicorn.run(app, host="0.0.0.0", port=6006)`