@@ -75,6 +75,13 @@ def _load_srt_modules(self):
7575
7676 @classmethod
7777 def INPUT_TYPES (cls ):
78+ # Import language models for dropdown
79+ try :
80+ from chatterbox .language_models import get_available_languages
81+ available_languages = get_available_languages ()
82+ except ImportError :
83+ available_languages = ["English" ]
84+
7885 return {
7986 "required" : {
8087 "srt_content" : ("STRING" , {
@@ -92,6 +99,10 @@ def INPUT_TYPES(cls):
9299The audio will match these exact timings.""" ,
93100 "tooltip" : "The SRT subtitle content. Each entry defines a text segment and its precise start and end times."
94101 }),
102+ "language" : (available_languages , {
103+ "default" : "English" ,
104+ "tooltip" : "Language model to use for text-to-speech generation. Local models are preferred over remote downloads."
105+ }),
95106 "device" : (["auto" , "cuda" , "cpu" ], {"default" : "auto" , "tooltip" : "The device to run the TTS model on (auto, cuda, or cpu)." }),
96107 "exaggeration" : ("FLOAT" , {
97108 "default" : 0.5 ,
@@ -214,8 +225,9 @@ def _safe_generate_tts_audio(self, text, audio_prompt, exaggeration, temperature
214225 raise
215226
216227 def _generate_tts_with_pause_tags (self , text : str , audio_prompt , exaggeration : float ,
217- temperature : float , cfg_weight : float , enable_pause_tags : bool = True ,
218- character : str = "narrator" , seed : int = 0 , enable_cache : bool = True ,
228+ temperature : float , cfg_weight : float , language : str = "English" ,
229+ enable_pause_tags : bool = True , character : str = "narrator" ,
230+ seed : int = 0 , enable_cache : bool = True ,
219231 crash_protection_template : str = "hmm ,, {seg} hmm ,," ,
220232 stable_audio_component : str = None ) -> torch .Tensor :
221233 """
@@ -247,7 +259,7 @@ def _generate_tts_with_pause_tags(self, text: str, audio_prompt, exaggeration: f
247259 audio_component = stable_audio_component if stable_audio_component else (getattr (audio_prompt , 'name' , str (audio_prompt )) if audio_prompt else "" )
248260 cache_key = self ._generate_segment_cache_key (
249261 f"{ character } :{ processed_text } " , exaggeration , temperature , cfg_weight , seed ,
250- audio_component , self .model_manager .get_model_source ("tts" ), self .device
262+ audio_component , self .model_manager .get_model_source ("tts" ), self .device , language
251263 )
252264
253265 # Try cache first
@@ -278,7 +290,7 @@ def tts_generate_func(text_content: str) -> torch.Tensor:
278290 # Use protected text for BOTH lookup and caching to ensure consistency
279291 cache_key = self ._generate_segment_cache_key (
280292 f"{ character } :{ protected_text } " , exaggeration , temperature , cfg_weight , seed ,
281- audio_component , self .model_manager .get_model_source ("tts" ), self .device
293+ audio_component , self .model_manager .get_model_source ("tts" ), self .device , language
282294 )
283295
284296 # Try cache first
@@ -306,7 +318,7 @@ def tts_generate_func(text_content: str) -> torch.Tensor:
306318
307319 def _generate_segment_cache_key (self , subtitle_text : str , exaggeration : float , temperature : float ,
308320 cfg_weight : float , seed : int , audio_prompt_component : str ,
309- model_source : str , device : str ) -> str :
321+ model_source : str , device : str , language : str = "English" ) -> str :
310322 """Generate cache key for a single audio segment based on generation parameters."""
311323 cache_data = {
312324 'text' : subtitle_text ,
@@ -316,7 +328,9 @@ def _generate_segment_cache_key(self, subtitle_text: str, exaggeration: float, t
316328 'seed' : seed ,
317329 'audio_prompt_component' : audio_prompt_component ,
318330 'model_source' : model_source ,
319- 'device' : device
331+ 'device' : device ,
332+ 'language' : language ,
333+ 'engine' : 'chatterbox_srt'
320334 }
321335 cache_string = str (sorted (cache_data .items ()))
322336 cache_key = hashlib .md5 (cache_string .encode ()).hexdigest ()
@@ -339,7 +353,7 @@ def _detect_overlaps(self, subtitles: List) -> bool:
339353 return True
340354 return False
341355
342- def generate_srt_speech (self , srt_content , device , exaggeration , temperature , cfg_weight , seed ,
356+ def generate_srt_speech (self , srt_content , language , device , exaggeration , temperature , cfg_weight , seed ,
343357 timing_mode , reference_audio = None , audio_prompt_path = "" ,
344358 enable_audio_cache = True , fade_for_StretchToFit = 0.01 ,
345359 max_stretch_ratio = 2.0 , min_stretch_ratio = 0.5 , timing_tolerance = 2.0 ,
@@ -351,7 +365,7 @@ def _process():
351365 raise ImportError ("SRT support not available - missing required modules" )
352366
353367 # Load TTS model
354- self .load_tts_model (device )
368+ self .load_tts_model (device , language )
355369
356370 # Set seed for reproducibility
357371 self .set_seed (seed )
@@ -456,8 +470,8 @@ def _process():
456470
457471 # Generate new audio for this character segment with pause tag support (includes internal caching)
458472 char_wav = self ._generate_tts_with_pause_tags (
459- processed_segment_text , char_audio , exaggeration , temperature , cfg_weight , True ,
460- character = char , seed = seed , enable_cache = enable_audio_cache ,
473+ processed_segment_text , char_audio , exaggeration , temperature , cfg_weight , language ,
474+ True , character = char , seed = seed , enable_cache = enable_audio_cache ,
461475 crash_protection_template = crash_protection_template ,
462476 stable_audio_component = stable_audio_prompt_component
463477 )
@@ -482,8 +496,8 @@ def _process():
482496
483497 # Generate new audio with pause tag support (includes internal caching)
484498 wav = self ._generate_tts_with_pause_tags (
485- processed_subtitle_text , audio_prompt , exaggeration , temperature , cfg_weight , True ,
486- character = "narrator" , seed = seed , enable_cache = enable_audio_cache ,
499+ processed_subtitle_text , audio_prompt , exaggeration , temperature , cfg_weight , language ,
500+ True , character = "narrator" , seed = seed , enable_cache = enable_audio_cache ,
487501 crash_protection_template = crash_protection_template ,
488502 stable_audio_component = stable_audio_prompt_component
489503 )
0 commit comments