Skip to content
This repository was archived by the owner on Aug 29, 2025. It is now read-only.

Commit ec06f63

Browse files
committed
Add multilanguage support to ChatterBox SRT TTS node
Features: - Add language dropdown with dynamic model discovery - Support for German, Norwegian, and English SRT generation - Language-aware cache keys to prevent model conflicts - Maintain full backward compatibility with existing workflows Technical Changes: - Updated INPUT_TYPES to include language selection dropdown - Enhanced generate_srt_speech function with language parameter - Modified _generate_tts_with_pause_tags to support language parameter - Updated cache key generation to include language in hash - All function calls updated to pass language parameter correctly SRT Node Benefits: - Subtitle timing with multilanguage voice models - Character switching works with all supported languages - Proper caching isolation between different language models - Seamless integration with existing SRT timing features
1 parent ffe8f45 commit ec06f63

File tree

1 file changed

+26
-12
lines changed

1 file changed

+26
-12
lines changed

nodes/srt_tts_node.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ def _load_srt_modules(self):
7575

7676
@classmethod
7777
def INPUT_TYPES(cls):
78+
# Import language models for dropdown
79+
try:
80+
from chatterbox.language_models import get_available_languages
81+
available_languages = get_available_languages()
82+
except ImportError:
83+
available_languages = ["English"]
84+
7885
return {
7986
"required": {
8087
"srt_content": ("STRING", {
@@ -92,6 +99,10 @@ def INPUT_TYPES(cls):
9299
The audio will match these exact timings.""",
93100
"tooltip": "The SRT subtitle content. Each entry defines a text segment and its precise start and end times."
94101
}),
102+
"language": (available_languages, {
103+
"default": "English",
104+
"tooltip": "Language model to use for text-to-speech generation. Local models are preferred over remote downloads."
105+
}),
95106
"device": (["auto", "cuda", "cpu"], {"default": "auto", "tooltip": "The device to run the TTS model on (auto, cuda, or cpu)."}),
96107
"exaggeration": ("FLOAT", {
97108
"default": 0.5,
@@ -214,8 +225,9 @@ def _safe_generate_tts_audio(self, text, audio_prompt, exaggeration, temperature
214225
raise
215226

216227
def _generate_tts_with_pause_tags(self, text: str, audio_prompt, exaggeration: float,
217-
temperature: float, cfg_weight: float, enable_pause_tags: bool = True,
218-
character: str = "narrator", seed: int = 0, enable_cache: bool = True,
228+
temperature: float, cfg_weight: float, language: str = "English",
229+
enable_pause_tags: bool = True, character: str = "narrator",
230+
seed: int = 0, enable_cache: bool = True,
219231
crash_protection_template: str = "hmm ,, {seg} hmm ,,",
220232
stable_audio_component: str = None) -> torch.Tensor:
221233
"""
@@ -247,7 +259,7 @@ def _generate_tts_with_pause_tags(self, text: str, audio_prompt, exaggeration: f
247259
audio_component = stable_audio_component if stable_audio_component else (getattr(audio_prompt, 'name', str(audio_prompt)) if audio_prompt else "")
248260
cache_key = self._generate_segment_cache_key(
249261
f"{character}:{processed_text}", exaggeration, temperature, cfg_weight, seed,
250-
audio_component, self.model_manager.get_model_source("tts"), self.device
262+
audio_component, self.model_manager.get_model_source("tts"), self.device, language
251263
)
252264

253265
# Try cache first
@@ -278,7 +290,7 @@ def tts_generate_func(text_content: str) -> torch.Tensor:
278290
# Use protected text for BOTH lookup and caching to ensure consistency
279291
cache_key = self._generate_segment_cache_key(
280292
f"{character}:{protected_text}", exaggeration, temperature, cfg_weight, seed,
281-
audio_component, self.model_manager.get_model_source("tts"), self.device
293+
audio_component, self.model_manager.get_model_source("tts"), self.device, language
282294
)
283295

284296
# Try cache first
@@ -306,7 +318,7 @@ def tts_generate_func(text_content: str) -> torch.Tensor:
306318

307319
def _generate_segment_cache_key(self, subtitle_text: str, exaggeration: float, temperature: float,
308320
cfg_weight: float, seed: int, audio_prompt_component: str,
309-
model_source: str, device: str) -> str:
321+
model_source: str, device: str, language: str = "English") -> str:
310322
"""Generate cache key for a single audio segment based on generation parameters."""
311323
cache_data = {
312324
'text': subtitle_text,
@@ -316,7 +328,9 @@ def _generate_segment_cache_key(self, subtitle_text: str, exaggeration: float, t
316328
'seed': seed,
317329
'audio_prompt_component': audio_prompt_component,
318330
'model_source': model_source,
319-
'device': device
331+
'device': device,
332+
'language': language,
333+
'engine': 'chatterbox_srt'
320334
}
321335
cache_string = str(sorted(cache_data.items()))
322336
cache_key = hashlib.md5(cache_string.encode()).hexdigest()
@@ -339,7 +353,7 @@ def _detect_overlaps(self, subtitles: List) -> bool:
339353
return True
340354
return False
341355

342-
def generate_srt_speech(self, srt_content, device, exaggeration, temperature, cfg_weight, seed,
356+
def generate_srt_speech(self, srt_content, language, device, exaggeration, temperature, cfg_weight, seed,
343357
timing_mode, reference_audio=None, audio_prompt_path="",
344358
enable_audio_cache=True, fade_for_StretchToFit=0.01,
345359
max_stretch_ratio=2.0, min_stretch_ratio=0.5, timing_tolerance=2.0,
@@ -351,7 +365,7 @@ def _process():
351365
raise ImportError("SRT support not available - missing required modules")
352366

353367
# Load TTS model
354-
self.load_tts_model(device)
368+
self.load_tts_model(device, language)
355369

356370
# Set seed for reproducibility
357371
self.set_seed(seed)
@@ -456,8 +470,8 @@ def _process():
456470

457471
# Generate new audio for this character segment with pause tag support (includes internal caching)
458472
char_wav = self._generate_tts_with_pause_tags(
459-
processed_segment_text, char_audio, exaggeration, temperature, cfg_weight, True,
460-
character=char, seed=seed, enable_cache=enable_audio_cache,
473+
processed_segment_text, char_audio, exaggeration, temperature, cfg_weight, language,
474+
True, character=char, seed=seed, enable_cache=enable_audio_cache,
461475
crash_protection_template=crash_protection_template,
462476
stable_audio_component=stable_audio_prompt_component
463477
)
@@ -482,8 +496,8 @@ def _process():
482496

483497
# Generate new audio with pause tag support (includes internal caching)
484498
wav = self._generate_tts_with_pause_tags(
485-
processed_subtitle_text, audio_prompt, exaggeration, temperature, cfg_weight, True,
486-
character="narrator", seed=seed, enable_cache=enable_audio_cache,
499+
processed_subtitle_text, audio_prompt, exaggeration, temperature, cfg_weight, language,
500+
True, character="narrator", seed=seed, enable_cache=enable_audio_cache,
487501
crash_protection_template=crash_protection_template,
488502
stable_audio_component=stable_audio_prompt_component
489503
)

0 commit comments

Comments
 (0)