Version 3.2.8

diodiogod · diodiogod · commit fb9319ec06a6 · 2025-07-27T12:48:11.000-03:00
Fix PortAudio dependency handling for voice recording
Add graceful fallback when PortAudio is missing
Update README with system dependency requirements
Add startup diagnostic for missing dependencies
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [3.2.8] - 2025-07-27
+
+### Added
+
+- Add graceful fallback when PortAudio is missing
+- Add startup diagnostic for missing dependencies
+
+### Fixed
+
+- Fix PortAudio dependency handling for voice recording
+
+### Changed
+
+- Update README with system dependency requirements
 ## [3.2.7] - 2025-07-23
 
 ### Fixed
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 [![Forks][forks-shield]][forks-url]
 [![Dynamic TOML Badge][version-shield]][version-url]
 
-# ComfyUI ChatterBox SRT Voice (diogod) v3.2.7
+# ComfyUI ChatterBox SRT Voice (diogod) v3.2.8
 
 *This is a refactored node, originally created by [ShmuelRonen](https://github.com/ShmuelRonen/ComfyUI_ChatterBox_Voice).*
 
@@ -221,6 +221,10 @@ This section provides a detailed guide for installing ComfyUI ChatterBox SRT Voi
 
 *   ComfyUI installation (Portable, Direct with venv, or through Manager)
 *   Python 3.12 or higher
+*   **PortAudio library** (required for voice recording features):
+    *   Linux: `sudo apt-get install portaudio19-dev`
+    *   macOS: `brew install portaudio`
+    *   Windows: Usually bundled with pip packages (no action needed)
 
 ### Installation Methods
 
diff --git a/chatterbox_srt/__init__.py b/chatterbox_srt/__init__.py
@@ -4,7 +4,7 @@
 """
 
 # Version info
-__version__ = "3.2.7"
+__version__ = "3.2.8"
 __author__ = "Diogod"
 
 # Import the new SRT modules
diff --git a/core/__init__.py b/core/__init__.py
@@ -4,7 +4,7 @@
 """
 
 # Version info
-__version__ = "3.2.7"
+__version__ = "3.2.8"
 __author__ = "Diogod"
 
 # Make imports available at package level
diff --git a/nodes.py b/nodes.py
@@ -1,5 +1,5 @@
 # Version and constants
-VERSION = "3.2.7"
+VERSION = "3.2.8"
 IS_DEV = False  # Set to False for release builds
 VERSION_DISPLAY = f"v{VERSION}" + (" (dev)" if IS_DEV else "")
 SEPARATOR = "=" * 70
@@ -371,6 +371,21 @@ def error(self, error):
     print("⚠️ No local models found - will download from Hugging Face")
     print("💡 Tip: First generation will download models (~1GB)")
     print("   Models will be saved locally for future use")
+
+# Check for system dependency issues (only show warnings if problems detected)
+dependency_warnings = []
+
+# Check PortAudio availability for voice recording
+if hasattr(audio_recorder_module, 'SOUNDDEVICE_AVAILABLE') and not audio_recorder_module.SOUNDDEVICE_AVAILABLE:
+    dependency_warnings.append("⚠️ PortAudio library not found - Voice recording disabled")
+    dependency_warnings.append("   Install with: sudo apt-get install portaudio19-dev (Linux) or brew install portaudio (macOS)")
+
+# Only show dependency section if there are warnings
+if dependency_warnings:
+    print("📋 System Dependencies:")
+    for warning in dependency_warnings:
+        print(f"   {warning}")
+
 print(SEPARATOR)
 
 # Print final initialization with nodes list
diff --git a/nodes/audio_recorder_node.py b/nodes/audio_recorder_node.py
@@ -1,20 +1,41 @@
 import torch
 import torchaudio
 import numpy as np
-import sounddevice as sd
 import tempfile
 import os
 import threading
 import time
 import queue
 
+# Graceful handling of sounddevice/PortAudio dependency
+try:
+    import sounddevice as sd
+    SOUNDDEVICE_AVAILABLE = True
+except ImportError as e:
+    SOUNDDEVICE_AVAILABLE = False
+    SOUNDDEVICE_ERROR = str(e)
+    print(f"⚠️  ChatterBox Voice Capture: sounddevice not available - {e}")
+    print("📋 To enable voice recording, install PortAudio:")
+    print("   Linux: sudo apt-get install portaudio19-dev")
+    print("   macOS: brew install portaudio") 
+    print("   Windows: Usually bundled with sounddevice")
+
 class ChatterBoxVoiceCapture:
     @classmethod
     def NAME(cls):
+        if not SOUNDDEVICE_AVAILABLE:
+            return "🎙️ ChatterBox Voice Capture (diogod) - PortAudio Required"
         return "🎙️ ChatterBox Voice Capture (diogod)"
     
     @classmethod
     def INPUT_TYPES(cls):
+        if not SOUNDDEVICE_AVAILABLE:
+            return {
+                "required": {
+                    "error_message": (["PortAudio library not found. Install with: sudo apt-get install portaudio19-dev (Linux) or brew install portaudio (macOS)"], {"default": "PortAudio library not found. Install with: sudo apt-get install portaudio19-dev (Linux) or brew install portaudio (macOS)"}),
+                }
+            }
+        
         # Get available audio devices
         devices = sd.query_devices()
         device_names = []
@@ -80,9 +101,25 @@ def INPUT_TYPES(cls):
     FUNCTION = "capture_voice_audio"
     CATEGORY = "ChatterBox Voice"
 
-    def capture_voice_audio(self, voice_device, voice_sample_rate, voice_max_recording_time, 
-                           voice_volume_gain, voice_silence_threshold, voice_silence_duration, 
-                           voice_auto_normalize, voice_trigger=0):
+    def capture_voice_audio(self, **kwargs):
+        if not SOUNDDEVICE_AVAILABLE:
+            print(f"❌ ChatterBox Voice Capture error: {SOUNDDEVICE_ERROR}")
+            print("📋 Install PortAudio to enable voice recording:")
+            print("   Linux: sudo apt-get install portaudio19-dev")
+            print("   macOS: brew install portaudio")
+            print("   Windows: Usually bundled with sounddevice")
+            # Return empty audio tensor
+            return (torch.zeros(1, 1, 22050),)
+        
+        # Extract parameters with defaults for graceful fallback
+        voice_device = kwargs.get('voice_device', '')
+        voice_sample_rate = kwargs.get('voice_sample_rate', 44100)
+        voice_max_recording_time = kwargs.get('voice_max_recording_time', 10.0)
+        voice_volume_gain = kwargs.get('voice_volume_gain', 1.0)
+        voice_silence_threshold = kwargs.get('voice_silence_threshold', 0.02)
+        voice_silence_duration = kwargs.get('voice_silence_duration', 2.0)
+        voice_auto_normalize = kwargs.get('voice_auto_normalize', True)
+        voice_trigger = kwargs.get('voice_trigger', 0)
         
         print(f"🎤 Starting ChatterBox Voice Capture...")
         print(f"Settings: max_time={voice_max_recording_time}s, volume_gain={voice_volume_gain}x, silence_threshold={voice_silence_threshold}, silence_duration={voice_silence_duration}s, rate={voice_sample_rate}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "chatterbox_srt_voice"
 description = "ChatterBox SRT Voice TTS Node is a fork of 'ChatteBox Voice' with additional devolpments and full F5-TTS implementation as well. I introduced a SRT node designed to help you synchronize your generated TTS audio with `.srt` subtitle files. Audio wave analyzer will help you find speech segments for f5 speech edit and much more!"
-version = "3.2.7"
+version = "3.2.8"
 license = {file = "LICENSE"}
 dependencies = ["s3tokenizer>=0.1.7", "resemble-perth", "librosa", "scipy", "omegaconf", "accelerate", "transformers==4.46.3", "# Additional dependencies for SRT support and audio processing", "conformer>=0.3.2", "torch", "torchaudio", "numpy", "einops", "phonemizer", "g2p-en", "unidecode", "# Audio processing and timing dependencies", "soundfile", "resampy", "webrtcvad", "# Optional but recommended for better performance", "numba"]