Files
mlx-knife/tests_2.0/test_audio_cli.py
T
The BROKE Cluster Team bf7480d042 Release 2.0.4-beta.9: Audio transcription via mlx-audio
Major Features:
- Audio transcription via mlx-audio backend (Whisper, >10min duration)
- OpenAI /v1/audio/transcriptions endpoint
- Memory Gate System (Vision: 8GB, Audio: 4GB)
- Config-based backend routing (ADR-020)
- Benchmark toolchain (memmon/memplot, Schema v0.2.2)

Key Fixes:
- EuroLLM tokenizer decoding
- Vision-model text-only routing regression
- Multimodal model context length detection
- Memory cleanup bug (mx.metal.clear_cache)
- Orphan process bug

Test Results:
- Unit tests: 647 passed, 11 skipped (Python 3.10-3.12)
- wet-umbrella: 171 passed total

See CHANGELOG.md for complete details and known issues.
2026-02-04 03:10:30 +01:00

285 lines
11 KiB
Python

"""Tests for --audio CLI argument (ADR-019 Phase 2).
Tests audio file handling in CLI without requiring actual model inference.
"""
import argparse
import pytest
from pathlib import Path
from unittest.mock import patch, MagicMock
import tempfile
# Path to audio test assets
AUDIO_ASSETS = Path(__file__).parent / "assets" / "audio"
class TestAudioCLIArgument:
"""Tests for --audio CLI argument parsing and file handling."""
def test_audio_argument_in_help(self, capsys):
"""CLI help should show --audio argument."""
from mlxk2.cli import main
import sys
with pytest.raises(SystemExit) as exc_info:
with patch.object(sys, 'argv', ['mlxk', 'run', '--help']):
main()
# Help exits with 0
assert exc_info.value.code == 0
captured = capsys.readouterr()
assert "--audio" in captured.out
def test_audio_help_mentions_wav(self, capsys):
"""CLI help should mention WAV format for audio."""
from mlxk2.cli import main
import sys
with pytest.raises(SystemExit):
with patch.object(sys, 'argv', ['mlxk', 'run', '--help']):
main()
captured = capsys.readouterr()
assert "WAV" in captured.out or "audio" in captured.out.lower()
def test_language_argument_in_help(self, capsys):
"""CLI help should show --language argument for audio."""
from mlxk2.cli import main
import sys
with pytest.raises(SystemExit) as exc_info:
with patch.object(sys, 'argv', ['mlxk', 'run', '--help']):
main()
assert exc_info.value.code == 0
captured = capsys.readouterr()
assert "--language" in captured.out
class TestAudioFileValidation:
"""Tests for audio file validation in CLI."""
def test_audio_file_not_found(self, capsys):
"""Should error if audio file doesn't exist."""
from mlxk2.cli import main
import sys
with pytest.raises(SystemExit) as exc_info:
with patch.object(sys, 'argv', ['mlxk', 'run', 'test-model', '--audio', '/nonexistent/file.wav', 'prompt']):
main()
assert exc_info.value.code == 1
captured = capsys.readouterr()
assert "Audio file not found" in captured.out or "Audio file not found" in captured.err
def test_audio_file_too_large(self, tmp_path, capsys):
"""Should error if audio file >50MB (ADR-020: limit raised for Whisper/Voxtral)."""
from mlxk2.cli import main
import sys
# Create a file that's too large (just over 50MB to trigger check)
large_file = tmp_path / "large.wav"
# Write 51MB of zeros
large_file.write_bytes(b'\x00' * (51 * 1024 * 1024))
with pytest.raises(SystemExit) as exc_info:
# Use --prompt flag to avoid argparse ambiguity with positional prompt
with patch.object(sys, 'argv', ['mlxk', 'run', 'test-model', '--audio', str(large_file), '--prompt', 'test']):
main()
assert exc_info.value.code == 1
captured = capsys.readouterr()
assert "Audio file too large" in captured.out or "Audio file too large" in captured.err
class TestAudioCapabilityCheck:
"""Tests for audio capability detection."""
def test_audio_without_audio_model_fails(self):
"""Should error when using --audio with non-audio model."""
from mlxk2.operations.run import run_model
# Pass audio to a model that doesn't exist (will fail capability check)
result = run_model(
model_spec="nonexistent-model-for-audio-test",
prompt="test",
audio=[("test.wav", b"fake audio data")],
)
assert result is not None
assert "Error:" in result
# Either "audio" capability error or model not found - both are acceptable
assert "audio" in result.lower() or "not found" in result.lower()
class TestAudioTestAssets:
"""Tests to verify audio test assets are available."""
def test_audio_assets_directory_exists(self):
"""Audio test assets directory should exist."""
assert AUDIO_ASSETS.exists(), f"Audio assets directory not found: {AUDIO_ASSETS}"
def test_audio_wav_files_exist(self):
"""WAV test files should be available."""
wav_files = list(AUDIO_ASSETS.glob("*.wav"))
assert len(wav_files) >= 1, "No WAV files found in audio assets"
def test_sources_file_has_attribution(self):
"""sources.txt should contain license attribution."""
sources_file = AUDIO_ASSETS / "sources.txt"
assert sources_file.exists(), "sources.txt not found"
content = sources_file.read_text()
assert "CC BY 4.0" in content, "License attribution missing"
assert "LibriSpeech" in content, "Source attribution missing"
class TestAudioBackendDetection:
"""Tests for config-based audio backend detection (ADR-020).
Detection routes audio models to appropriate backend:
- STT models (Voxtral, Whisper) → Backend.MLX_AUDIO
- Multimodal models (Gemma-3n) → Backend.MLX_VLM
"""
def test_voxtral_routes_to_mlx_audio(self, tmp_path):
"""Voxtral model_type should route to MLX_AUDIO backend."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
# Voxtral config (STT-focused, even with audio_config)
config = {
"model_type": "voxtral",
"audio_config": {"num_mel_bins": 128},
"vision_config": {}, # Empty (no vision)
}
backend = detect_audio_backend(tmp_path, config)
assert backend == Backend.MLX_AUDIO, "Voxtral should route to MLX_AUDIO"
def test_whisper_routes_to_mlx_audio(self, tmp_path):
"""Whisper model_type should route to MLX_AUDIO backend."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
config = {"model_type": "whisper"}
backend = detect_audio_backend(tmp_path, config)
assert backend == Backend.MLX_AUDIO, "Whisper should route to MLX_AUDIO"
def test_gemma3n_routes_to_mlx_vlm(self, tmp_path):
"""Gemma-3n (audio + vision) should route to MLX_VLM backend."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
# Gemma-3n config (multimodal: vision + audio)
config = {
"model_type": "gemma3n",
"audio_config": {"num_mel_bins": 80},
"vision_config": {"image_size": 896, "patch_size": 14}, # Populated
}
backend = detect_audio_backend(tmp_path, config)
assert backend == Backend.MLX_VLM, "Gemma-3n should route to MLX_VLM"
def test_whisper_feature_extractor_routes_to_mlx_audio(self, tmp_path):
"""Models with WhisperFeatureExtractor should route to MLX_AUDIO."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
import json
# Create preprocessor_config.json with WhisperFeatureExtractor
preprocessor_config = {"feature_extractor_type": "WhisperFeatureExtractor"}
(tmp_path / "preprocessor_config.json").write_text(json.dumps(preprocessor_config))
# Config without explicit model_type
config = {"hidden_size": 768}
backend = detect_audio_backend(tmp_path, config)
assert backend == Backend.MLX_AUDIO, "WhisperFeatureExtractor should route to MLX_AUDIO"
def test_audio_config_only_routes_to_mlx_vlm(self, tmp_path):
"""Models with audio_config but no STT signals route to MLX_VLM (fallback)."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
# Unknown audio model with just audio_config
config = {
"model_type": "unknown_audio_model",
"audio_config": {"sample_rate": 16000},
}
backend = detect_audio_backend(tmp_path, config)
assert backend == Backend.MLX_VLM, "audio_config alone should fallback to MLX_VLM"
def test_no_audio_config_returns_none(self, tmp_path):
"""Models without audio_config should return None."""
from mlxk2.operations.common import detect_audio_backend
# Pure text model
config = {"model_type": "llama", "hidden_size": 4096}
backend = detect_audio_backend(tmp_path, config)
assert backend is None, "Non-audio model should return None"
def test_name_heuristic_whisper(self, tmp_path):
"""Fallback name heuristic: 'whisper' in name routes to MLX_AUDIO."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
# Create probe path with "whisper" in name
whisper_path = tmp_path / "whisper-large-v3-turbo-4bit"
whisper_path.mkdir()
config = {"hidden_size": 768} # No model_type, no audio_config
backend = detect_audio_backend(whisper_path, config)
assert backend == Backend.MLX_AUDIO, "Name heuristic should detect whisper"
def test_original_voxtral_no_vision_config(self, tmp_path):
"""Original Mistral Voxtral (no vision_config key) routes to MLX_AUDIO."""
from mlxk2.operations.common import detect_audio_backend
from mlxk2.core.capabilities import Backend
# Original Mistral format (no vision_config key at all)
config = {
"model_type": "voxtral",
"audio_config": {"encoder_config": {"num_mel_bins": 128}},
}
backend = detect_audio_backend(tmp_path, config)
assert backend == Backend.MLX_AUDIO, "Original Voxtral should route to MLX_AUDIO"
class TestAudioRuntimeCompatibility:
"""Tests for audio runtime compatibility check (ADR-020)."""
def test_mlx_audio_backend_checks_mlx_audio(self):
"""MLX_AUDIO backend should check for mlx-audio package."""
from mlxk2.operations.common import audio_runtime_compatibility
from mlxk2.core.capabilities import Backend
import importlib.util
# Skip if mlx-audio not installed (PyPI #442: [audio] extra is empty)
if importlib.util.find_spec("mlx_audio") is None:
pytest.skip("mlx-audio not installed (requires manual editable install)")
# MLX_AUDIO backend (Whisper, Voxtral)
compatible, reason = audio_runtime_compatibility(Backend.MLX_AUDIO)
# Should be compatible when mlx-audio is installed
assert compatible is True, f"Expected mlx-audio to be available: {reason}"
assert reason is None
def test_mlx_vlm_backend_checks_mlx_vlm(self):
"""MLX_VLM backend should check for mlx-vlm package."""
from mlxk2.operations.common import audio_runtime_compatibility
from mlxk2.core.capabilities import Backend
# MLX_VLM backend (Gemma-3n multimodal)
compatible, reason = audio_runtime_compatibility(Backend.MLX_VLM)
# Should be compatible if mlx-vlm is installed
assert compatible is True, f"Expected mlx-vlm to be available: {reason}"
assert reason is None