Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 66 additions & 16 deletions vllm_plugin/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,26 +73,76 @@ def _ffmpeg_load_file(filepath) -> tuple[np.ndarray, int]:

# Register FFmpeg-based audio loader
import vllm.multimodal.audio as _vllm_audio_module
_OriginalAudioMediaIO = _vllm_audio_module.AudioMediaIO
import warnings

class _PatchedAudioMediaIO(_OriginalAudioMediaIO):
"""AudioMediaIO implementation using FFmpeg for audio decoding."""
# Handle both old and new vLLM versions
# In newer versions, AudioMediaIO may not exist or may have been moved
try:
_OriginalAudioMediaIO = _vllm_audio_module.AudioMediaIO

def load_bytes(self, data: bytes) -> tuple[np.ndarray, int]:
return _ffmpeg_load_bytes(data, media_type=None)
class _PatchedAudioMediaIO(_OriginalAudioMediaIO):
"""AudioMediaIO implementation using FFmpeg for audio decoding."""

def __init__(self, **kwargs):
# Call parent constructor if it exists
if hasattr(super(), '__init__'):
super().__init__(**kwargs)

def load_bytes(self, data: bytes) -> tuple[np.ndarray, int]:
return _ffmpeg_load_bytes(data, media_type=None)

def load_base64(self, media_type: str, data: str) -> tuple[np.ndarray, int]:
return _ffmpeg_load_bytes(base64.b64decode(data), media_type=media_type)

def load_file(self, filepath) -> tuple[np.ndarray, int]:
return _ffmpeg_load_file(filepath)

def load_base64(self, media_type: str, data: str) -> tuple[np.ndarray, int]:
return _ffmpeg_load_bytes(base64.b64decode(data), media_type=media_type)
# Replace globally
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO

def load_file(self, filepath) -> tuple[np.ndarray, int]:
return _ffmpeg_load_file(filepath)

# Replace globally
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO

# Also patch in utils module where it's imported
import vllm.multimodal.utils as _vllm_utils_module
_vllm_utils_module.AudioMediaIO = _PatchedAudioMediaIO
# Also patch in utils module where it's imported
try:
import vllm.multimodal.utils as _vllm_utils_module
_vllm_utils_module.AudioMediaIO = _PatchedAudioMediaIO
except (ImportError, AttributeError) as e:
warnings.warn(f"Could not patch AudioMediaIO in vllm.multimodal.utils: {e}", UserWarning)

except (AttributeError, ImportError) as e:
# AudioMediaIO doesn't exist in this vLLM version
# Define our own standalone implementation
warnings.warn(
f"AudioMediaIO not found in vllm.multimodal.audio ({e}). "
"Using standalone FFmpeg-based implementation for compatibility.",
UserWarning
)

class _PatchedAudioMediaIO:
"""Standalone AudioMediaIO implementation using FFmpeg for audio decoding.

This is used when vLLM doesn't provide AudioMediaIO or it's been moved/removed.
"""

def __init__(self, **kwargs):
pass

def load_bytes(self, data: bytes) -> tuple[np.ndarray, int]:
return _ffmpeg_load_bytes(data, media_type=None)

def load_base64(self, media_type: str, data: str) -> tuple[np.ndarray, int]:
return _ffmpeg_load_bytes(base64.b64decode(data), media_type=media_type)

def load_file(self, filepath) -> tuple[np.ndarray, int]:
return _ffmpeg_load_file(filepath)

# Try to register it in the module if possible
try:
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO
except (AttributeError, TypeError) as e:
warnings.warn(
f"Could not register AudioMediaIO in vllm.multimodal.audio: {e}. "
"Audio loading will use FFmpeg functions directly.",
UserWarning
)

# ============================================================================

Expand Down