mirror of
https://github.com/cloudstack-llc/mlx-knife.git
synced 2026-07-01 20:44:14 -04:00
bf7480d042
Major Features: - Audio transcription via mlx-audio backend (Whisper, >10min duration) - OpenAI /v1/audio/transcriptions endpoint - Memory Gate System (Vision: 8GB, Audio: 4GB) - Config-based backend routing (ADR-020) - Benchmark toolchain (memmon/memplot, Schema v0.2.2) Key Fixes: - EuroLLM tokenizer decoding - Vision-model text-only routing regression - Multimodal model context length detection - Memory cleanup bug (mx.metal.clear_cache) - Orphan process bug Test Results: - Unit tests: 647 passed, 11 skipped (Python 3.10-3.12) - wet-umbrella: 171 passed total See CHANGELOG.md for complete details and known issues.
381 lines
17 KiB
Python
381 lines
17 KiB
Python
"""Tests for resumable pull feature (Priority 1a).
|
|
|
|
Tests actual resume functionality with controlled download interruption.
|
|
|
|
Strategy:
|
|
- Real network download with controlled interruption
|
|
- Verifies detection (unhealthy → requires_confirmation)
|
|
- Verifies actual resume completes download
|
|
- Uses isolated cache (no impact on user cache)
|
|
|
|
IMPORTANT: This test MUST stay in tests_2.0/ (NOT tests_2.0/live/).
|
|
The live/ directory has pytest hooks for Portfolio Discovery that interfere
|
|
with the isolated_cache fixture, causing the test to fail.
|
|
|
|
IMPORTANT: This test uses live_pull marker (not live_e2e) because
|
|
module-scoped fixtures from live/conftest.py (_use_real_mlx_modules,
|
|
vision_portfolio, text_portfolio) interfere with HuggingFace Hub's symlink
|
|
creation mechanism during resume. These fixtures manipulate sys.path and
|
|
run subprocesses, causing import resolution issues. Must run independently.
|
|
|
|
Opt-in: Requires MLXK2_TEST_RESUMABLE_DOWNLOAD=1 (network test)
|
|
|
|
Run: MLXK2_TEST_RESUMABLE_DOWNLOAD=1 pytest -m live_pull tests_2.0/test_resumable_pull.py -v
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import subprocess
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
# Mark as live_pull (isolated from live_e2e module fixtures)
|
|
# CRITICAL: Must include `live` marker so -m "not live" excludes these tests
|
|
pytestmark = [pytest.mark.live, pytest.mark.live_pull]
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not os.environ.get("MLXK2_TEST_RESUMABLE_DOWNLOAD"),
|
|
reason="Requires MLXK2_TEST_RESUMABLE_DOWNLOAD=1 (network download test)"
|
|
)
|
|
class TestResumablePullRealDownload:
|
|
"""Test actual resume functionality with controlled download interruption.
|
|
|
|
Uses real network downloads with subprocess interruption to test resume logic.
|
|
Isolated cache prevents impact on user cache.
|
|
"""
|
|
|
|
def test_pull_resume_with_subprocess_interrupt(self, isolated_cache, monkeypatch):
|
|
"""Test that interrupted downloads can be resumed successfully.
|
|
|
|
Phase 1: Start download in subprocess and interrupt after partial completion
|
|
Phase 2: Verify model is unhealthy and returns requires_confirmation
|
|
Phase 3: Call pull_operation() to resume download
|
|
Phase 4: Verify model is now healthy
|
|
|
|
Note: This test downloads from HuggingFace (network required).
|
|
Uses isolated cache - no impact on user cache.
|
|
"""
|
|
from mlxk2.operations.pull import pull_operation
|
|
from mlxk2.operations.health import is_model_healthy
|
|
from mlxk2.core.cache import hf_to_cache_dir
|
|
|
|
# Select small multi-shard model for testing
|
|
# Phi-3-mini-4k-instruct-4bit is ~2.3GB with multiple shards
|
|
model = os.environ.get(
|
|
"MLXK2_RESUMABLE_TEST_MODEL",
|
|
"mlx-community/Phi-3-mini-4k-instruct-4bit"
|
|
)
|
|
|
|
print(f"\n[TEST] ========== RESUMABLE PULL TEST START ==========")
|
|
print(f"[TEST] Model: {model}")
|
|
print(f"[TEST] Isolated cache: {isolated_cache}")
|
|
|
|
# Use isolated_cache directly (fixture already patches MODEL_CACHE)
|
|
cache_dir = isolated_cache / hf_to_cache_dir(model)
|
|
blobs_dir = isolated_cache.parent / "blobs"
|
|
|
|
print(f"[TEST] Cache dir: {cache_dir}")
|
|
print(f"[TEST] Blobs dir: {blobs_dir}")
|
|
|
|
# Phase 1: Download with controlled interruption
|
|
# Use subprocess so we can kill it cleanly
|
|
# Force sequential download (max_workers=1) so we can reliably interrupt
|
|
import_code = """
|
|
from huggingface_hub import snapshot_download
|
|
snapshot_download(
|
|
repo_id='{model}',
|
|
local_files_only=False,
|
|
resume_download=True,
|
|
max_workers=1 # Sequential download for reliable interruption
|
|
)
|
|
""".format(model=model)
|
|
|
|
# Start subprocess with output visible for debugging
|
|
# HF_HOME points to parent of hub directory (HuggingFace standard)
|
|
# isolated_cache is already the hub path, so use parent
|
|
proc = subprocess.Popen(
|
|
[sys.executable, "-c", import_code],
|
|
env={**os.environ, "HF_HOME": str(isolated_cache.parent)},
|
|
stdout=None, # Let output go to terminal
|
|
stderr=None # Let errors go to terminal
|
|
)
|
|
|
|
# Interrupt download after fixed time (simpler and more reliable)
|
|
# Model downloads in ~220s total, interrupt after 45s = ~20% downloaded
|
|
interrupt_after = 45 # seconds
|
|
start_time = time.time()
|
|
interrupted = False
|
|
|
|
print(f"[TEST] Starting download of {model}...")
|
|
print(f"[TEST] Subprocess PID: {proc.pid}")
|
|
print(f"[TEST] Will interrupt after {interrupt_after}s (to create partial state)")
|
|
|
|
try:
|
|
while proc.poll() is None:
|
|
elapsed = time.time() - start_time
|
|
|
|
# Show progress every 5 seconds
|
|
if int(elapsed) % 5 == 0 and elapsed > 0:
|
|
if blobs_dir.exists():
|
|
total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
|
|
print(f"[TEST] {elapsed:.0f}s elapsed, {total_bytes / 1_000_000:.1f}MB downloaded")
|
|
|
|
if elapsed >= interrupt_after:
|
|
# Time to interrupt
|
|
if blobs_dir.exists():
|
|
total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
|
|
print(f"\n[TEST] Interrupting after {elapsed:.0f}s ({total_bytes / 1_000_000:.1f}MB downloaded)")
|
|
else:
|
|
print(f"\n[TEST] Interrupting after {elapsed:.0f}s")
|
|
proc.terminate()
|
|
proc.wait(timeout=5)
|
|
interrupted = True
|
|
break
|
|
|
|
time.sleep(1.0)
|
|
|
|
if not interrupted:
|
|
# Process completed before we could interrupt
|
|
# This can happen if model is very small or already cached
|
|
proc.wait()
|
|
pytest.skip("Download completed before interrupt - model may be too small or cached")
|
|
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|
|
pytest.fail("Download took too long - possible network issue")
|
|
|
|
# Phase 2: Verify unhealthy state and requires_confirmation
|
|
print("[TEST] Phase 2: Checking health after interrupt...")
|
|
|
|
# DEBUG: Trace cache state
|
|
print(f"[DEBUG] Cache dir exists: {cache_dir.exists()}")
|
|
if cache_dir.exists():
|
|
print(f"[DEBUG] Cache dir contents: {list(cache_dir.iterdir())[:5]}")
|
|
print(f"[DEBUG] Blobs dir exists: {blobs_dir.exists()}")
|
|
if blobs_dir.exists():
|
|
blob_count = len(list(blobs_dir.iterdir()))
|
|
total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
|
|
print(f"[DEBUG] Blobs: {blob_count} files, {total_bytes / 1_000_000:.1f}MB total")
|
|
print(f"[DEBUG] Current HF_HOME: {os.environ.get('HF_HOME')}")
|
|
|
|
healthy, reason = is_model_healthy(model)
|
|
if healthy:
|
|
pytest.skip("Model is healthy after interrupt - download may have completed too quickly")
|
|
|
|
print(f"[TEST] Model unhealthy (expected): {reason}")
|
|
|
|
result = pull_operation(model)
|
|
assert result["status"] == "success", f"Expected success status, got: {result}"
|
|
assert result["data"]["download_status"] == "requires_confirmation", \
|
|
f"Expected requires_confirmation for unhealthy model, got: {result['data']['download_status']}"
|
|
assert "--force-resume" in result["data"]["message"], \
|
|
"Message should suggest --force-resume flag"
|
|
print("[TEST] ✓ Phase 2 passed: requires_confirmation detected")
|
|
|
|
# Phase 3: Resume download (simulates user confirming resume)
|
|
# Call pull_operation() with force_resume=True to skip health check
|
|
print("[TEST] Phase 3: Resuming download...")
|
|
print(f"[DEBUG] Before resume - HF_HOME: {os.environ.get('HF_HOME')}")
|
|
print(f"[DEBUG] Before resume - Cache dir exists: {cache_dir.exists()}")
|
|
|
|
# CRITICAL: Ensure Phase 1 subprocess is fully terminated before resuming
|
|
# Race condition: SIGTERM is sent but subprocess cleanup takes time
|
|
# If resume starts too early, huggingface_hub sees partial files and skips download
|
|
if proc.poll() is None:
|
|
print(f"[DEBUG] Waiting for Phase 1 subprocess (PID {proc.pid}) to terminate...")
|
|
try:
|
|
proc.wait(timeout=15)
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|
|
pytest.fail("Phase 1 subprocess did not terminate within 15s")
|
|
|
|
# Additional wait for filesystem to settle (file locks, etc.)
|
|
# Without this, huggingface_hub may see incomplete state and skip download
|
|
# Testing shows marker-run needs longer wait (possibly pytest interference)
|
|
print(f"[DEBUG] Phase 1 subprocess terminated with code: {proc.poll()}")
|
|
print("[DEBUG] Waiting 10s for filesystem to settle...")
|
|
time.sleep(10)
|
|
|
|
result = pull_operation(model, force_resume=True)
|
|
|
|
# Wait for any background downloads to settle
|
|
print("[DEBUG] Waiting 5s for background downloads to settle...")
|
|
time.sleep(5)
|
|
|
|
print(f"[DEBUG] After resume - pull_operation result: {result['data']['download_status']}")
|
|
print(f"[DEBUG] After resume - HF_HOME: {os.environ.get('HF_HOME')}")
|
|
print(f"[DEBUG] After resume - Cache dir exists: {cache_dir.exists()}")
|
|
if cache_dir.exists():
|
|
print(f"[DEBUG] After resume - Cache dir contents: {list(cache_dir.iterdir())[:5]}")
|
|
|
|
# Check actual snapshot contents
|
|
snapshots_dir = cache_dir / "snapshots"
|
|
if snapshots_dir.exists():
|
|
snapshots = list(snapshots_dir.iterdir())
|
|
print(f"[DEBUG] After resume - Found {len(snapshots)} snapshot(s)")
|
|
if snapshots:
|
|
latest_snapshot = snapshots[0]
|
|
snapshot_files = list(latest_snapshot.iterdir())
|
|
total_snapshot_bytes = sum(f.stat().st_size for f in snapshot_files if f.is_file())
|
|
print(f"[DEBUG] After resume - Snapshot has {len(snapshot_files)} files, {total_snapshot_bytes / 1_000_000:.1f}MB total")
|
|
# Show first few files with sizes
|
|
for f in snapshot_files[:5]:
|
|
if f.is_file():
|
|
print(f"[DEBUG] - {f.name}: {f.stat().st_size / 1_000_000:.1f}MB")
|
|
|
|
# Also check model-specific blobs dir (correct location per user)
|
|
model_blobs_dir = cache_dir / "blobs"
|
|
if model_blobs_dir.exists():
|
|
blob_count = len(list(model_blobs_dir.iterdir()))
|
|
total_bytes = sum(f.stat().st_size for f in model_blobs_dir.iterdir() if f.is_file())
|
|
print(f"[DEBUG] After resume - Model blobs: {blob_count} files, {total_bytes / 1_000_000:.1f}MB total")
|
|
|
|
# Phase 4: Verify healthy state
|
|
print("[TEST] Phase 4: Verifying resumed download is healthy...")
|
|
print(f"[DEBUG] Before health check - HF_HOME: {os.environ.get('HF_HOME')}")
|
|
assert result["status"] == "success", f"Resume should succeed, got: {result}"
|
|
assert result["data"]["download_status"] in ("success", "already_exists"), \
|
|
f"Expected success or already_exists after resume, got: {result['data']['download_status']}"
|
|
|
|
healthy, reason = is_model_healthy(model)
|
|
assert healthy, f"Model should be healthy after resume, got: {reason}"
|
|
print("[TEST] ✓ All phases passed: Resume successful!")
|
|
|
|
|
|
# =============================================================================
|
|
# Unit Tests for CLI Logic (no network required, always run)
|
|
# =============================================================================
|
|
|
|
class TestResumablePullCLI:
|
|
"""Unit tests for CLI --force-resume logic.
|
|
|
|
These tests mock pull_operation() to test the CLI argument handling
|
|
without requiring actual network downloads.
|
|
"""
|
|
|
|
def test_force_resume_works_in_non_interactive_mode(self, monkeypatch):
|
|
"""Test that --force-resume flag is honored when stdin is not a TTY.
|
|
|
|
Regression test for bug where non-interactive stdin would exit(1)
|
|
before checking --force-resume flag.
|
|
"""
|
|
from unittest.mock import MagicMock, patch
|
|
import argparse
|
|
|
|
# Track calls to pull_operation
|
|
pull_calls = []
|
|
|
|
def mock_pull_operation(model, force_resume=False):
|
|
pull_calls.append({"model": model, "force_resume": force_resume})
|
|
if not force_resume:
|
|
# First call: return requires_confirmation
|
|
return {
|
|
"status": "success",
|
|
"data": {
|
|
"download_status": "requires_confirmation",
|
|
"model": model,
|
|
"message": "Model has partial download. Use --force-resume to continue."
|
|
}
|
|
}
|
|
else:
|
|
# Second call with force_resume=True: return success
|
|
return {
|
|
"status": "success",
|
|
"data": {
|
|
"download_status": "success",
|
|
"model": model
|
|
}
|
|
}
|
|
|
|
# Mock stdin.isatty() to return False (non-interactive)
|
|
mock_stdin = MagicMock()
|
|
mock_stdin.isatty.return_value = False
|
|
monkeypatch.setattr("sys.stdin", mock_stdin)
|
|
|
|
# Import CLI module after mocking
|
|
from mlxk2 import cli
|
|
|
|
# Mock pull_operation
|
|
monkeypatch.setattr("mlxk2.cli.pull_operation", mock_pull_operation)
|
|
|
|
# Mock print_result to prevent output
|
|
monkeypatch.setattr("mlxk2.cli.print_result", lambda *args, **kwargs: None)
|
|
|
|
# Create args with force_resume=True
|
|
args = argparse.Namespace(
|
|
command="pull",
|
|
model="test-model",
|
|
json=False,
|
|
force_resume=True,
|
|
verbose=False
|
|
)
|
|
|
|
# Call the CLI handler directly
|
|
# We need to extract just the pull handling logic
|
|
result = mock_pull_operation(args.model)
|
|
|
|
# Simulate the CLI logic (from cli.py lines 323-357)
|
|
if result.get("data", {}).get("download_status") == "requires_confirmation":
|
|
if args.json:
|
|
pass # Would print and exit
|
|
elif getattr(args, "force_resume", False):
|
|
# This is the fix: --force-resume should work in non-interactive
|
|
result = mock_pull_operation(args.model, force_resume=True)
|
|
elif not mock_stdin.isatty():
|
|
# Non-interactive without --force-resume: would fail
|
|
pytest.fail("Should not reach here with --force-resume=True")
|
|
|
|
# Verify pull_operation was called with force_resume=True
|
|
assert len(pull_calls) == 2, f"Expected 2 calls to pull_operation, got {len(pull_calls)}"
|
|
assert pull_calls[0]["force_resume"] == False, "First call should be without force_resume"
|
|
assert pull_calls[1]["force_resume"] == True, "Second call should be with force_resume=True"
|
|
assert result["data"]["download_status"] == "success"
|
|
|
|
def test_non_interactive_without_force_resume_fails(self, monkeypatch):
|
|
"""Test that non-interactive mode without --force-resume fails properly.
|
|
|
|
This ensures we don't silently ignore the requires_confirmation status.
|
|
"""
|
|
from unittest.mock import MagicMock
|
|
import argparse
|
|
|
|
def mock_pull_operation(model, force_resume=False):
|
|
return {
|
|
"status": "success",
|
|
"data": {
|
|
"download_status": "requires_confirmation",
|
|
"model": model,
|
|
"message": "Model has partial download. Use --force-resume to continue."
|
|
}
|
|
}
|
|
|
|
# Mock stdin.isatty() to return False (non-interactive)
|
|
mock_stdin = MagicMock()
|
|
mock_stdin.isatty.return_value = False
|
|
monkeypatch.setattr("sys.stdin", mock_stdin)
|
|
|
|
# Create args WITHOUT force_resume
|
|
args = argparse.Namespace(
|
|
command="pull",
|
|
model="test-model",
|
|
json=False,
|
|
force_resume=False,
|
|
verbose=False
|
|
)
|
|
|
|
result = mock_pull_operation(args.model)
|
|
|
|
# Simulate the CLI logic - should fail for non-interactive without --force-resume
|
|
should_fail = False
|
|
if result.get("data", {}).get("download_status") == "requires_confirmation":
|
|
if args.json:
|
|
pass
|
|
elif getattr(args, "force_resume", False):
|
|
pass # Would resume
|
|
elif not mock_stdin.isatty():
|
|
should_fail = True # This is expected behavior
|
|
|
|
assert should_fail, "Non-interactive mode without --force-resume should fail"
|