Version 3.4.3: Fix language switching bug and add flexible language aliases

diodiogod · diodiogod · commit f9c0a6a9d365 · 2025-08-05T02:54:00.000-03:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [3.4.3] - 2025-08-05
+
+### Fixed
+
+- Fix language switching not working properly and add support for flexible language aliases like [German:], [Brazil:], [USA:]
 ## [3.4.2] - 2025-08-05
 
 ### Fixed
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 [![Forks][forks-shield]][forks-url]
 [![Dynamic TOML Badge][version-shield]][version-url]
 
-# ComfyUI ChatterBox SRT Voice (diogod) v3.4.2
+# ComfyUI ChatterBox SRT Voice (diogod) v3.4.3
 
 *This is a refactored node, originally created by [ShmuelRonen](https://github.com/ShmuelRonen/ComfyUI_ChatterBox_Voice).*
 
diff --git a/nodes.py b/nodes.py
@@ -1,5 +1,5 @@
 # Version and constants
-VERSION = "3.4.2"
+VERSION = "3.4.3"
 IS_DEV = False  # Set to False for release builds
 VERSION_DISPLAY = f"v{VERSION}" + (" (dev)" if IS_DEV else "")
 SEPARATOR = "=" * 70
diff --git a/nodes/chatterbox/chatterbox_tts_node.py b/nodes/chatterbox/chatterbox_tts_node.py
@@ -617,17 +617,24 @@ def _process():
                 def get_chatterbox_model_for_language(lang_code: str) -> str:
                     """Map language codes to ChatterBox model names"""
                     lang_model_map = {
-                        'en': inputs["language"],  # Use selected model for English (default)
+                        'en': 'English',          # English (always use English model)
                         'de': 'German',           # German
                         'es': 'Spanish',          # Spanish
                         'fr': 'French',           # French
                         'it': 'Italian',          # Italian
                         'pt': 'Portuguese',       # Portuguese
+                        'pt-br': 'Portuguese',    # Brazilian Portuguese (use Portuguese model)
+                        'pt-pt': 'Portuguese',    # European Portuguese (use Portuguese model)
                         'no': 'Norwegian',        # Norwegian
                         'nb': 'Norwegian',        # Norwegian Bokmål
                         'nn': 'Norwegian',        # Norwegian Nynorsk
                     }
-                    return lang_model_map.get(lang_code.lower(), inputs["language"])
+                    # For the main model language, use the selected model; for others, use language-specific models
+                    selected_lang = inputs["language"].lower()
+                    if lang_code.lower() == selected_lang:
+                        return inputs["language"]  # Use selected model for main language
+                    else:
+                        return lang_model_map.get(lang_code.lower(), inputs["language"])
                 
                 # Group segments by language with original order tracking
                 language_groups = {}
diff --git a/nodes/f5tts/f5tts_node.py b/nodes/f5tts/f5tts_node.py
@@ -383,7 +383,7 @@ def _process():
                 def get_f5tts_model_for_language(lang_code: str) -> str:
                     """Map language codes to F5-TTS model names"""
                     lang_model_map = {
-                        'en': inputs["model"],  # Use selected model for English (default)
+                        'en': 'F5TTS_v1_Base',  # English (use v1 - better quality)
                         'de': 'F5-DE',         # German
                         'es': 'F5-ES',         # Spanish  
                         'fr': 'F5-FR',         # French
@@ -393,8 +393,14 @@ def get_f5tts_model_for_language(lang_code: str) -> str:
                         'th': 'F5-TH',         # Thai
                         'pt': 'F5-PT-BR',      # Portuguese (Brazilian)
                         'pt-br': 'F5-PT-BR',   # Portuguese (Brazilian)
+                        'pt-pt': 'F5-PT-BR',   # Portuguese (European - use Brazilian model for now)
                     }
-                    return lang_model_map.get(lang_code.lower(), inputs["model"])
+                    # For the main model language, use the selected model; for others, use language-specific models
+                    selected_lang = inputs.get("language", "English").lower()
+                    if lang_code.lower() == selected_lang or (selected_lang == "portuguese" and lang_code.lower() in ["pt", "pt-br"]):
+                        return inputs["model"]  # Use selected model for main language
+                    else:
+                        return lang_model_map.get(lang_code.lower(), inputs["model"])
                 
                 # Group segments by language with original order tracking
                 language_groups = {}
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "chatterbox_srt_voice"
 description = "ChatterBox SRT Voice TTS Node is a fork of 'ChatteBox Voice' with additional devolpments and full F5-TTS implementation as well. I introduced a SRT node designed to help you synchronize your generated TTS audio with `.srt` subtitle files. Audio wave analyzer will help you find speech segments for f5 speech edit and much more!"
-version = "3.4.2"
+version = "3.4.3"
 license = {file = "LICENSE"}
 dependencies = ["s3tokenizer>=0.1.7", "resemble-perth", "librosa", "scipy", "omegaconf", "accelerate", "transformers==4.46.3", "# Additional dependencies for SRT support and audio processing", "conformer>=0.3.2", "torch", "torchaudio", "numpy", "einops", "phonemizer", "g2p-en", "unidecode", "# Audio processing and timing dependencies", "soundfile", "resampy", "webrtcvad", "# Optional but recommended for better performance", "numba"]
 
diff --git a/utils/models/language_mapper.py b/utils/models/language_mapper.py
@@ -32,9 +32,14 @@ def get_model_for_language(self, lang_code: str, default_model: str) -> str:
         """
         engine_mappings = self.mappings.get(self.engine_type, {})
         
-        # For base language (usually English), use the provided default model
+        # Check if we should use the default model for this language
+        # Only use default model if it's actually for the requested language
         if lang_code == 'en':
-            return default_model
+            # For English, always use English model regardless of default
+            if self.engine_type == 'f5tts':
+                return 'F5TTS_v1_Base'  # Use v1 for better quality
+            else:  # chatterbox
+                return 'English'
         
         # Check if language is supported
         if lang_code in engine_mappings:
diff --git a/utils/text/character_parser.py b/utils/text/character_parser.py
@@ -39,8 +39,8 @@ class CharacterParser:
     # Regex pattern for character tags: [CharacterName] or [language:CharacterName] (excludes pause tags)
     CHARACTER_TAG_PATTERN = re.compile(r'\[(?!pause:)([^\]]+)\]')
     
-    # Regex to parse language:character format (allows empty character names like [fr:])
-    LANGUAGE_CHARACTER_PATTERN = re.compile(r'^([a-zA-Z]{2,3}):(.*)$')
+    # Regex to parse language:character format (supports flexible language names)
+    LANGUAGE_CHARACTER_PATTERN = re.compile(r'^([a-zA-Z0-9\-_À-ÿ\s]+):(.*)$')
     
     def __init__(self, default_character: str = "narrator", default_language: Optional[str] = None):
         """
@@ -55,6 +55,75 @@ def __init__(self, default_character: str = "narrator", default_language: Option
         self.available_characters = set()
         self.character_fallbacks = {}
         self.character_language_defaults = {}
+        
+        # Language alias system for flexible language switching
+        self.language_aliases = {
+            # German variations
+            'de': 'de', 'german': 'de', 'deutsch': 'de', 'germany': 'de', 'deutschland': 'de',
+            
+            # English variations
+            'en': 'en', 'english': 'en', 'eng': 'en', 'usa': 'en', 'uk': 'en', 'america': 'en', 'britain': 'en',
+            
+            # Brazilian Portuguese (separate from European Portuguese)
+            'pt-br': 'pt-br', 'ptbr': 'pt-br', 'brazilian': 'pt-br', 'brasilian': 'pt-br',
+            'brazil': 'pt-br', 'brasil': 'pt-br', 'br': 'pt-br', 'português brasileiro': 'pt-br',
+            
+            # European Portuguese (separate from Brazilian)
+            'pt-pt': 'pt-pt', 'portugal': 'pt-pt', 'european portuguese': 'pt-pt',
+            'portuguese': 'pt-pt', 'português': 'pt-pt', 'portugues': 'pt-pt',
+            
+            # French variations
+            'fr': 'fr', 'french': 'fr', 'français': 'fr', 'francais': 'fr', 
+            'france': 'fr', 'français de france': 'fr',
+            
+            # Spanish variations
+            'es': 'es', 'spanish': 'es', 'español': 'es', 'espanol': 'es',
+            'spain': 'es', 'españa': 'es', 'castilian': 'es',
+            
+            # Italian variations
+            'it': 'it', 'italian': 'it', 'italiano': 'it', 'italy': 'it', 'italia': 'it',
+            
+            # Norwegian variations
+            'no': 'no', 'norwegian': 'no', 'norsk': 'no', 'norway': 'no', 'norge': 'no',
+            
+            # Dutch variations
+            'nl': 'nl', 'dutch': 'nl', 'nederlands': 'nl', 'netherlands': 'nl', 'holland': 'nl',
+            
+            # Japanese variations
+            'ja': 'ja', 'japanese': 'ja', '日本語': 'ja', 'japan': 'ja', 'nihongo': 'ja',
+            
+            # Chinese variations
+            'zh': 'zh', 'chinese': 'zh', '中文': 'zh', 'china': 'zh',
+            'zh-cn': 'zh-cn', 'mandarin': 'zh-cn', 'simplified': 'zh-cn', 'mainland': 'zh-cn',
+            'zh-tw': 'zh-tw', 'traditional': 'zh-tw', 'taiwan': 'zh-tw', 'taiwanese': 'zh-tw',
+            
+            # Russian variations
+            'ru': 'ru', 'russian': 'ru', 'русский': 'ru', 'russia': 'ru', 'россия': 'ru',
+            
+            # Korean variations
+            'ko': 'ko', 'korean': 'ko', '한국어': 'ko', 'korea': 'ko', 'south korea': 'ko',
+        }
+    
+    def resolve_language_alias(self, language_input: str) -> str:
+        """
+        Resolve language alias to canonical language code.
+        
+        Args:
+            language_input: User input language (e.g., "German", "brasil", "pt-BR")
+            
+        Returns:
+            Canonical language code (e.g., "de", "pt-br")
+        """
+        # Normalize input: lowercase and strip whitespace
+        normalized = language_input.strip().lower()
+        
+        # Look up in aliases
+        canonical = self.language_aliases.get(normalized)
+        if canonical:
+            return canonical
+            
+        # If no alias found, return the original (for backward compatibility)
+        return normalized
     
     def set_available_characters(self, characters: List[str]):
         """
@@ -98,8 +167,10 @@ def parse_language_character_tag(self, tag_content: str) -> Tuple[Optional[str],
         # Check if it's in language:character format
         match = self.LANGUAGE_CHARACTER_PATTERN.match(tag_content.strip())
         if match:
-            language = match.group(1).lower()
+            raw_language = match.group(1)
             character = match.group(2).strip()
+            # Resolve language alias to canonical form
+            language = self.resolve_language_alias(raw_language)
             # If character is empty (e.g., [fr:]), default to narrator
             if not character:
                 character = self.default_character