Files
mlx-knife/tests_2.0/test_resumable_pull.py
T
The BROKE Cluster Team bf7480d042 Release 2.0.4-beta.9: Audio transcription via mlx-audio
Major Features:
- Audio transcription via mlx-audio backend (Whisper, >10min duration)
- OpenAI /v1/audio/transcriptions endpoint
- Memory Gate System (Vision: 8GB, Audio: 4GB)
- Config-based backend routing (ADR-020)
- Benchmark toolchain (memmon/memplot, Schema v0.2.2)

Key Fixes:
- EuroLLM tokenizer decoding
- Vision-model text-only routing regression
- Multimodal model context length detection
- Memory cleanup bug (mx.metal.clear_cache)
- Orphan process bug

Test Results:
- Unit tests: 647 passed, 11 skipped (Python 3.10-3.12)
- wet-umbrella: 171 passed total

See CHANGELOG.md for complete details and known issues.
2026-02-04 03:10:30 +01:00

381 lines
17 KiB
Python

"""Tests for resumable pull feature (Priority 1a).
Tests actual resume functionality with controlled download interruption.
Strategy:
- Real network download with controlled interruption
- Verifies detection (unhealthy → requires_confirmation)
- Verifies actual resume completes download
- Uses isolated cache (no impact on user cache)
IMPORTANT: This test MUST stay in tests_2.0/ (NOT tests_2.0/live/).
The live/ directory has pytest hooks for Portfolio Discovery that interfere
with the isolated_cache fixture, causing the test to fail.
IMPORTANT: This test uses live_pull marker (not live_e2e) because
module-scoped fixtures from live/conftest.py (_use_real_mlx_modules,
vision_portfolio, text_portfolio) interfere with HuggingFace Hub's symlink
creation mechanism during resume. These fixtures manipulate sys.path and
run subprocesses, causing import resolution issues. Must run independently.
Opt-in: Requires MLXK2_TEST_RESUMABLE_DOWNLOAD=1 (network test)
Run: MLXK2_TEST_RESUMABLE_DOWNLOAD=1 pytest -m live_pull tests_2.0/test_resumable_pull.py -v
"""
import os
import sys
import time
import subprocess
import pytest
from pathlib import Path
# Mark as live_pull (isolated from live_e2e module fixtures)
# CRITICAL: Must include `live` marker so -m "not live" excludes these tests
pytestmark = [pytest.mark.live, pytest.mark.live_pull]
@pytest.mark.skipif(
not os.environ.get("MLXK2_TEST_RESUMABLE_DOWNLOAD"),
reason="Requires MLXK2_TEST_RESUMABLE_DOWNLOAD=1 (network download test)"
)
class TestResumablePullRealDownload:
"""Test actual resume functionality with controlled download interruption.
Uses real network downloads with subprocess interruption to test resume logic.
Isolated cache prevents impact on user cache.
"""
def test_pull_resume_with_subprocess_interrupt(self, isolated_cache, monkeypatch):
"""Test that interrupted downloads can be resumed successfully.
Phase 1: Start download in subprocess and interrupt after partial completion
Phase 2: Verify model is unhealthy and returns requires_confirmation
Phase 3: Call pull_operation() to resume download
Phase 4: Verify model is now healthy
Note: This test downloads from HuggingFace (network required).
Uses isolated cache - no impact on user cache.
"""
from mlxk2.operations.pull import pull_operation
from mlxk2.operations.health import is_model_healthy
from mlxk2.core.cache import hf_to_cache_dir
# Select small multi-shard model for testing
# Phi-3-mini-4k-instruct-4bit is ~2.3GB with multiple shards
model = os.environ.get(
"MLXK2_RESUMABLE_TEST_MODEL",
"mlx-community/Phi-3-mini-4k-instruct-4bit"
)
print(f"\n[TEST] ========== RESUMABLE PULL TEST START ==========")
print(f"[TEST] Model: {model}")
print(f"[TEST] Isolated cache: {isolated_cache}")
# Use isolated_cache directly (fixture already patches MODEL_CACHE)
cache_dir = isolated_cache / hf_to_cache_dir(model)
blobs_dir = isolated_cache.parent / "blobs"
print(f"[TEST] Cache dir: {cache_dir}")
print(f"[TEST] Blobs dir: {blobs_dir}")
# Phase 1: Download with controlled interruption
# Use subprocess so we can kill it cleanly
# Force sequential download (max_workers=1) so we can reliably interrupt
import_code = """
from huggingface_hub import snapshot_download
snapshot_download(
repo_id='{model}',
local_files_only=False,
resume_download=True,
max_workers=1 # Sequential download for reliable interruption
)
""".format(model=model)
# Start subprocess with output visible for debugging
# HF_HOME points to parent of hub directory (HuggingFace standard)
# isolated_cache is already the hub path, so use parent
proc = subprocess.Popen(
[sys.executable, "-c", import_code],
env={**os.environ, "HF_HOME": str(isolated_cache.parent)},
stdout=None, # Let output go to terminal
stderr=None # Let errors go to terminal
)
# Interrupt download after fixed time (simpler and more reliable)
# Model downloads in ~220s total, interrupt after 45s = ~20% downloaded
interrupt_after = 45 # seconds
start_time = time.time()
interrupted = False
print(f"[TEST] Starting download of {model}...")
print(f"[TEST] Subprocess PID: {proc.pid}")
print(f"[TEST] Will interrupt after {interrupt_after}s (to create partial state)")
try:
while proc.poll() is None:
elapsed = time.time() - start_time
# Show progress every 5 seconds
if int(elapsed) % 5 == 0 and elapsed > 0:
if blobs_dir.exists():
total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
print(f"[TEST] {elapsed:.0f}s elapsed, {total_bytes / 1_000_000:.1f}MB downloaded")
if elapsed >= interrupt_after:
# Time to interrupt
if blobs_dir.exists():
total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
print(f"\n[TEST] Interrupting after {elapsed:.0f}s ({total_bytes / 1_000_000:.1f}MB downloaded)")
else:
print(f"\n[TEST] Interrupting after {elapsed:.0f}s")
proc.terminate()
proc.wait(timeout=5)
interrupted = True
break
time.sleep(1.0)
if not interrupted:
# Process completed before we could interrupt
# This can happen if model is very small or already cached
proc.wait()
pytest.skip("Download completed before interrupt - model may be too small or cached")
except subprocess.TimeoutExpired:
proc.kill()
pytest.fail("Download took too long - possible network issue")
# Phase 2: Verify unhealthy state and requires_confirmation
print("[TEST] Phase 2: Checking health after interrupt...")
# DEBUG: Trace cache state
print(f"[DEBUG] Cache dir exists: {cache_dir.exists()}")
if cache_dir.exists():
print(f"[DEBUG] Cache dir contents: {list(cache_dir.iterdir())[:5]}")
print(f"[DEBUG] Blobs dir exists: {blobs_dir.exists()}")
if blobs_dir.exists():
blob_count = len(list(blobs_dir.iterdir()))
total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
print(f"[DEBUG] Blobs: {blob_count} files, {total_bytes / 1_000_000:.1f}MB total")
print(f"[DEBUG] Current HF_HOME: {os.environ.get('HF_HOME')}")
healthy, reason = is_model_healthy(model)
if healthy:
pytest.skip("Model is healthy after interrupt - download may have completed too quickly")
print(f"[TEST] Model unhealthy (expected): {reason}")
result = pull_operation(model)
assert result["status"] == "success", f"Expected success status, got: {result}"
assert result["data"]["download_status"] == "requires_confirmation", \
f"Expected requires_confirmation for unhealthy model, got: {result['data']['download_status']}"
assert "--force-resume" in result["data"]["message"], \
"Message should suggest --force-resume flag"
print("[TEST] ✓ Phase 2 passed: requires_confirmation detected")
# Phase 3: Resume download (simulates user confirming resume)
# Call pull_operation() with force_resume=True to skip health check
print("[TEST] Phase 3: Resuming download...")
print(f"[DEBUG] Before resume - HF_HOME: {os.environ.get('HF_HOME')}")
print(f"[DEBUG] Before resume - Cache dir exists: {cache_dir.exists()}")
# CRITICAL: Ensure Phase 1 subprocess is fully terminated before resuming
# Race condition: SIGTERM is sent but subprocess cleanup takes time
# If resume starts too early, huggingface_hub sees partial files and skips download
if proc.poll() is None:
print(f"[DEBUG] Waiting for Phase 1 subprocess (PID {proc.pid}) to terminate...")
try:
proc.wait(timeout=15)
except subprocess.TimeoutExpired:
proc.kill()
pytest.fail("Phase 1 subprocess did not terminate within 15s")
# Additional wait for filesystem to settle (file locks, etc.)
# Without this, huggingface_hub may see incomplete state and skip download
# Testing shows marker-run needs longer wait (possibly pytest interference)
print(f"[DEBUG] Phase 1 subprocess terminated with code: {proc.poll()}")
print("[DEBUG] Waiting 10s for filesystem to settle...")
time.sleep(10)
result = pull_operation(model, force_resume=True)
# Wait for any background downloads to settle
print("[DEBUG] Waiting 5s for background downloads to settle...")
time.sleep(5)
print(f"[DEBUG] After resume - pull_operation result: {result['data']['download_status']}")
print(f"[DEBUG] After resume - HF_HOME: {os.environ.get('HF_HOME')}")
print(f"[DEBUG] After resume - Cache dir exists: {cache_dir.exists()}")
if cache_dir.exists():
print(f"[DEBUG] After resume - Cache dir contents: {list(cache_dir.iterdir())[:5]}")
# Check actual snapshot contents
snapshots_dir = cache_dir / "snapshots"
if snapshots_dir.exists():
snapshots = list(snapshots_dir.iterdir())
print(f"[DEBUG] After resume - Found {len(snapshots)} snapshot(s)")
if snapshots:
latest_snapshot = snapshots[0]
snapshot_files = list(latest_snapshot.iterdir())
total_snapshot_bytes = sum(f.stat().st_size for f in snapshot_files if f.is_file())
print(f"[DEBUG] After resume - Snapshot has {len(snapshot_files)} files, {total_snapshot_bytes / 1_000_000:.1f}MB total")
# Show first few files with sizes
for f in snapshot_files[:5]:
if f.is_file():
print(f"[DEBUG] - {f.name}: {f.stat().st_size / 1_000_000:.1f}MB")
# Also check model-specific blobs dir (correct location per user)
model_blobs_dir = cache_dir / "blobs"
if model_blobs_dir.exists():
blob_count = len(list(model_blobs_dir.iterdir()))
total_bytes = sum(f.stat().st_size for f in model_blobs_dir.iterdir() if f.is_file())
print(f"[DEBUG] After resume - Model blobs: {blob_count} files, {total_bytes / 1_000_000:.1f}MB total")
# Phase 4: Verify healthy state
print("[TEST] Phase 4: Verifying resumed download is healthy...")
print(f"[DEBUG] Before health check - HF_HOME: {os.environ.get('HF_HOME')}")
assert result["status"] == "success", f"Resume should succeed, got: {result}"
assert result["data"]["download_status"] in ("success", "already_exists"), \
f"Expected success or already_exists after resume, got: {result['data']['download_status']}"
healthy, reason = is_model_healthy(model)
assert healthy, f"Model should be healthy after resume, got: {reason}"
print("[TEST] ✓ All phases passed: Resume successful!")
# =============================================================================
# Unit Tests for CLI Logic (no network required, always run)
# =============================================================================
class TestResumablePullCLI:
"""Unit tests for CLI --force-resume logic.
These tests mock pull_operation() to test the CLI argument handling
without requiring actual network downloads.
"""
def test_force_resume_works_in_non_interactive_mode(self, monkeypatch):
"""Test that --force-resume flag is honored when stdin is not a TTY.
Regression test for bug where non-interactive stdin would exit(1)
before checking --force-resume flag.
"""
from unittest.mock import MagicMock, patch
import argparse
# Track calls to pull_operation
pull_calls = []
def mock_pull_operation(model, force_resume=False):
pull_calls.append({"model": model, "force_resume": force_resume})
if not force_resume:
# First call: return requires_confirmation
return {
"status": "success",
"data": {
"download_status": "requires_confirmation",
"model": model,
"message": "Model has partial download. Use --force-resume to continue."
}
}
else:
# Second call with force_resume=True: return success
return {
"status": "success",
"data": {
"download_status": "success",
"model": model
}
}
# Mock stdin.isatty() to return False (non-interactive)
mock_stdin = MagicMock()
mock_stdin.isatty.return_value = False
monkeypatch.setattr("sys.stdin", mock_stdin)
# Import CLI module after mocking
from mlxk2 import cli
# Mock pull_operation
monkeypatch.setattr("mlxk2.cli.pull_operation", mock_pull_operation)
# Mock print_result to prevent output
monkeypatch.setattr("mlxk2.cli.print_result", lambda *args, **kwargs: None)
# Create args with force_resume=True
args = argparse.Namespace(
command="pull",
model="test-model",
json=False,
force_resume=True,
verbose=False
)
# Call the CLI handler directly
# We need to extract just the pull handling logic
result = mock_pull_operation(args.model)
# Simulate the CLI logic (from cli.py lines 323-357)
if result.get("data", {}).get("download_status") == "requires_confirmation":
if args.json:
pass # Would print and exit
elif getattr(args, "force_resume", False):
# This is the fix: --force-resume should work in non-interactive
result = mock_pull_operation(args.model, force_resume=True)
elif not mock_stdin.isatty():
# Non-interactive without --force-resume: would fail
pytest.fail("Should not reach here with --force-resume=True")
# Verify pull_operation was called with force_resume=True
assert len(pull_calls) == 2, f"Expected 2 calls to pull_operation, got {len(pull_calls)}"
assert pull_calls[0]["force_resume"] == False, "First call should be without force_resume"
assert pull_calls[1]["force_resume"] == True, "Second call should be with force_resume=True"
assert result["data"]["download_status"] == "success"
def test_non_interactive_without_force_resume_fails(self, monkeypatch):
"""Test that non-interactive mode without --force-resume fails properly.
This ensures we don't silently ignore the requires_confirmation status.
"""
from unittest.mock import MagicMock
import argparse
def mock_pull_operation(model, force_resume=False):
return {
"status": "success",
"data": {
"download_status": "requires_confirmation",
"model": model,
"message": "Model has partial download. Use --force-resume to continue."
}
}
# Mock stdin.isatty() to return False (non-interactive)
mock_stdin = MagicMock()
mock_stdin.isatty.return_value = False
monkeypatch.setattr("sys.stdin", mock_stdin)
# Create args WITHOUT force_resume
args = argparse.Namespace(
command="pull",
model="test-model",
json=False,
force_resume=False,
verbose=False
)
result = mock_pull_operation(args.model)
# Simulate the CLI logic - should fail for non-interactive without --force-resume
should_fail = False
if result.get("data", {}).get("download_status") == "requires_confirmation":
if args.json:
pass
elif getattr(args, "force_resume", False):
pass # Would resume
elif not mock_stdin.isatty():
should_fail = True # This is expected behavior
assert should_fail, "Non-interactive mode without --force-resume should fail"