mlx-knife/tests_2.0/test_resumable_pull.py

"""Tests for resumable pull feature (Priority 1a).

Tests actual resume functionality with controlled download interruption.

Strategy:
- Real network download with controlled interruption
- Verifies detection (unhealthy → requires_confirmation)
- Verifies actual resume completes download
- Uses isolated cache (no impact on user cache)

IMPORTANT: This test MUST stay in tests_2.0/ (NOT tests_2.0/live/).
The live/ directory has pytest hooks for Portfolio Discovery that interfere
with the isolated_cache fixture, causing the test to fail.

IMPORTANT: This test uses live_pull marker (not live_e2e) because
module-scoped fixtures from live/conftest.py (_use_real_mlx_modules,
vision_portfolio, text_portfolio) interfere with HuggingFace Hub's symlink
creation mechanism during resume. These fixtures manipulate sys.path and
run subprocesses, causing import resolution issues. Must run independently.

Opt-in: Requires MLXK2_TEST_RESUMABLE_DOWNLOAD=1 (network test)

Run: MLXK2_TEST_RESUMABLE_DOWNLOAD=1 pytest -m live_pull tests_2.0/test_resumable_pull.py -v
"""

import os
import sys
import time
import subprocess
import pytest
from pathlib import Path

# Mark as live_pull (isolated from live_e2e module fixtures)
# CRITICAL: Must include `live` marker so -m "not live" excludes these tests
pytestmark = [pytest.mark.live, pytest.mark.live_pull]


@pytest.mark.skipif(
    not os.environ.get("MLXK2_TEST_RESUMABLE_DOWNLOAD"),
    reason="Requires MLXK2_TEST_RESUMABLE_DOWNLOAD=1 (network download test)"
)
class TestResumablePullRealDownload:
    """Test actual resume functionality with controlled download interruption.

    Uses real network downloads with subprocess interruption to test resume logic.
    Isolated cache prevents impact on user cache.
    """

    def test_pull_resume_with_subprocess_interrupt(self, isolated_cache, monkeypatch):
        """Test that interrupted downloads can be resumed successfully.

        Phase 1: Start download in subprocess and interrupt after partial completion
        Phase 2: Verify model is unhealthy and returns requires_confirmation
        Phase 3: Call pull_operation() to resume download
        Phase 4: Verify model is now healthy

        Note: This test downloads from HuggingFace (network required).
        Uses isolated cache - no impact on user cache.
        """
        from mlxk2.operations.pull import pull_operation
        from mlxk2.operations.health import is_model_healthy
        from mlxk2.core.cache import hf_to_cache_dir

        # Select small multi-shard model for testing
        # Phi-3-mini-4k-instruct-4bit is ~2.3GB with multiple shards
        model = os.environ.get(
            "MLXK2_RESUMABLE_TEST_MODEL",
            "mlx-community/Phi-3-mini-4k-instruct-4bit"
        )

        print(f"\n[TEST] ========== RESUMABLE PULL TEST START ==========")
        print(f"[TEST] Model: {model}")
        print(f"[TEST] Isolated cache: {isolated_cache}")

        # Use isolated_cache directly (fixture already patches MODEL_CACHE)
        cache_dir = isolated_cache / hf_to_cache_dir(model)
        blobs_dir = isolated_cache.parent / "blobs"

        print(f"[TEST] Cache dir: {cache_dir}")
        print(f"[TEST] Blobs dir: {blobs_dir}")

        # Phase 1: Download with controlled interruption
        # Use subprocess so we can kill it cleanly
        # Force sequential download (max_workers=1) so we can reliably interrupt
        import_code = """
from huggingface_hub import snapshot_download
snapshot_download(
    repo_id='{model}',
    local_files_only=False,
    resume_download=True,
    max_workers=1  # Sequential download for reliable interruption
)
""".format(model=model)

        # Start subprocess with output visible for debugging
        # HF_HOME points to parent of hub directory (HuggingFace standard)
        # isolated_cache is already the hub path, so use parent
        proc = subprocess.Popen(
            [sys.executable, "-c", import_code],
            env={**os.environ, "HF_HOME": str(isolated_cache.parent)},
            stdout=None,  # Let output go to terminal
            stderr=None   # Let errors go to terminal
        )

        # Interrupt download after fixed time (simpler and more reliable)
        # Model downloads in ~220s total, interrupt after 45s = ~20% downloaded
        interrupt_after = 45  # seconds
        start_time = time.time()
        interrupted = False

        print(f"[TEST] Starting download of {model}...")
        print(f"[TEST] Subprocess PID: {proc.pid}")
        print(f"[TEST] Will interrupt after {interrupt_after}s (to create partial state)")

        try:
            while proc.poll() is None:
                elapsed = time.time() - start_time

                # Show progress every 5 seconds
                if int(elapsed) % 5 == 0 and elapsed > 0:
                    if blobs_dir.exists():
                        total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
                        print(f"[TEST] {elapsed:.0f}s elapsed, {total_bytes / 1_000_000:.1f}MB downloaded")

                if elapsed >= interrupt_after:
                    # Time to interrupt
                    if blobs_dir.exists():
                        total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
                        print(f"\n[TEST] Interrupting after {elapsed:.0f}s ({total_bytes / 1_000_000:.1f}MB downloaded)")
                    else:
                        print(f"\n[TEST] Interrupting after {elapsed:.0f}s")
                    proc.terminate()
                    proc.wait(timeout=5)
                    interrupted = True
                    break

                time.sleep(1.0)

            if not interrupted:
                # Process completed before we could interrupt
                # This can happen if model is very small or already cached
                proc.wait()
                pytest.skip("Download completed before interrupt - model may be too small or cached")

        except subprocess.TimeoutExpired:
            proc.kill()
            pytest.fail("Download took too long - possible network issue")

        # Phase 2: Verify unhealthy state and requires_confirmation
        print("[TEST] Phase 2: Checking health after interrupt...")

        # DEBUG: Trace cache state
        print(f"[DEBUG] Cache dir exists: {cache_dir.exists()}")
        if cache_dir.exists():
            print(f"[DEBUG] Cache dir contents: {list(cache_dir.iterdir())[:5]}")
        print(f"[DEBUG] Blobs dir exists: {blobs_dir.exists()}")
        if blobs_dir.exists():
            blob_count = len(list(blobs_dir.iterdir()))
            total_bytes = sum(f.stat().st_size for f in blobs_dir.iterdir() if f.is_file())
            print(f"[DEBUG] Blobs: {blob_count} files, {total_bytes / 1_000_000:.1f}MB total")
        print(f"[DEBUG] Current HF_HOME: {os.environ.get('HF_HOME')}")

        healthy, reason = is_model_healthy(model)
        if healthy:
            pytest.skip("Model is healthy after interrupt - download may have completed too quickly")

        print(f"[TEST] Model unhealthy (expected): {reason}")

        result = pull_operation(model)
        assert result["status"] == "success", f"Expected success status, got: {result}"
        assert result["data"]["download_status"] == "requires_confirmation", \
            f"Expected requires_confirmation for unhealthy model, got: {result['data']['download_status']}"
        assert "--force-resume" in result["data"]["message"], \
            "Message should suggest --force-resume flag"
        print("[TEST] ✓ Phase 2 passed: requires_confirmation detected")

        # Phase 3: Resume download (simulates user confirming resume)
        # Call pull_operation() with force_resume=True to skip health check
        print("[TEST] Phase 3: Resuming download...")
        print(f"[DEBUG] Before resume - HF_HOME: {os.environ.get('HF_HOME')}")
        print(f"[DEBUG] Before resume - Cache dir exists: {cache_dir.exists()}")

        # CRITICAL: Ensure Phase 1 subprocess is fully terminated before resuming
        # Race condition: SIGTERM is sent but subprocess cleanup takes time
        # If resume starts too early, huggingface_hub sees partial files and skips download
        if proc.poll() is None:
            print(f"[DEBUG] Waiting for Phase 1 subprocess (PID {proc.pid}) to terminate...")
            try:
                proc.wait(timeout=15)
            except subprocess.TimeoutExpired:
                proc.kill()
                pytest.fail("Phase 1 subprocess did not terminate within 15s")

        # Additional wait for filesystem to settle (file locks, etc.)
        # Without this, huggingface_hub may see incomplete state and skip download
        # Testing shows marker-run needs longer wait (possibly pytest interference)
        print(f"[DEBUG] Phase 1 subprocess terminated with code: {proc.poll()}")
        print("[DEBUG] Waiting 10s for filesystem to settle...")
        time.sleep(10)

        result = pull_operation(model, force_resume=True)

        # Wait for any background downloads to settle
        print("[DEBUG] Waiting 5s for background downloads to settle...")
        time.sleep(5)

        print(f"[DEBUG] After resume - pull_operation result: {result['data']['download_status']}")
        print(f"[DEBUG] After resume - HF_HOME: {os.environ.get('HF_HOME')}")
        print(f"[DEBUG] After resume - Cache dir exists: {cache_dir.exists()}")
        if cache_dir.exists():
            print(f"[DEBUG] After resume - Cache dir contents: {list(cache_dir.iterdir())[:5]}")

            # Check actual snapshot contents
            snapshots_dir = cache_dir / "snapshots"
            if snapshots_dir.exists():
                snapshots = list(snapshots_dir.iterdir())
                print(f"[DEBUG] After resume - Found {len(snapshots)} snapshot(s)")
                if snapshots:
                    latest_snapshot = snapshots[0]
                    snapshot_files = list(latest_snapshot.iterdir())
                    total_snapshot_bytes = sum(f.stat().st_size for f in snapshot_files if f.is_file())
                    print(f"[DEBUG] After resume - Snapshot has {len(snapshot_files)} files, {total_snapshot_bytes / 1_000_000:.1f}MB total")
                    # Show first few files with sizes
                    for f in snapshot_files[:5]:
                        if f.is_file():
                            print(f"[DEBUG]   - {f.name}: {f.stat().st_size / 1_000_000:.1f}MB")

        # Also check model-specific blobs dir (correct location per user)
        model_blobs_dir = cache_dir / "blobs"
        if model_blobs_dir.exists():
            blob_count = len(list(model_blobs_dir.iterdir()))
            total_bytes = sum(f.stat().st_size for f in model_blobs_dir.iterdir() if f.is_file())
            print(f"[DEBUG] After resume - Model blobs: {blob_count} files, {total_bytes / 1_000_000:.1f}MB total")

        # Phase 4: Verify healthy state
        print("[TEST] Phase 4: Verifying resumed download is healthy...")
        print(f"[DEBUG] Before health check - HF_HOME: {os.environ.get('HF_HOME')}")
        assert result["status"] == "success", f"Resume should succeed, got: {result}"
        assert result["data"]["download_status"] in ("success", "already_exists"), \
            f"Expected success or already_exists after resume, got: {result['data']['download_status']}"

        healthy, reason = is_model_healthy(model)
        assert healthy, f"Model should be healthy after resume, got: {reason}"
        print("[TEST] ✓ All phases passed: Resume successful!")


# =============================================================================
# Unit Tests for CLI Logic (no network required, always run)
# =============================================================================

class TestResumablePullCLI:
    """Unit tests for CLI --force-resume logic.

    These tests mock pull_operation() to test the CLI argument handling
    without requiring actual network downloads.
    """

    def test_force_resume_works_in_non_interactive_mode(self, monkeypatch):
        """Test that --force-resume flag is honored when stdin is not a TTY.

        Regression test for bug where non-interactive stdin would exit(1)
        before checking --force-resume flag.
        """
        from unittest.mock import MagicMock, patch
        import argparse

        # Track calls to pull_operation
        pull_calls = []

        def mock_pull_operation(model, force_resume=False):
            pull_calls.append({"model": model, "force_resume": force_resume})
            if not force_resume:
                # First call: return requires_confirmation
                return {
                    "status": "success",
                    "data": {
                        "download_status": "requires_confirmation",
                        "model": model,
                        "message": "Model has partial download. Use --force-resume to continue."
                    }
                }
            else:
                # Second call with force_resume=True: return success
                return {
                    "status": "success",
                    "data": {
                        "download_status": "success",
                        "model": model
                    }
                }

        # Mock stdin.isatty() to return False (non-interactive)
        mock_stdin = MagicMock()
        mock_stdin.isatty.return_value = False
        monkeypatch.setattr("sys.stdin", mock_stdin)

        # Import CLI module after mocking
        from mlxk2 import cli

        # Mock pull_operation
        monkeypatch.setattr("mlxk2.cli.pull_operation", mock_pull_operation)

        # Mock print_result to prevent output
        monkeypatch.setattr("mlxk2.cli.print_result", lambda *args, **kwargs: None)

        # Create args with force_resume=True
        args = argparse.Namespace(
            command="pull",
            model="test-model",
            json=False,
            force_resume=True,
            verbose=False
        )

        # Call the CLI handler directly
        # We need to extract just the pull handling logic
        result = mock_pull_operation(args.model)

        # Simulate the CLI logic (from cli.py lines 323-357)
        if result.get("data", {}).get("download_status") == "requires_confirmation":
            if args.json:
                pass  # Would print and exit
            elif getattr(args, "force_resume", False):
                # This is the fix: --force-resume should work in non-interactive
                result = mock_pull_operation(args.model, force_resume=True)
            elif not mock_stdin.isatty():
                # Non-interactive without --force-resume: would fail
                pytest.fail("Should not reach here with --force-resume=True")

        # Verify pull_operation was called with force_resume=True
        assert len(pull_calls) == 2, f"Expected 2 calls to pull_operation, got {len(pull_calls)}"
        assert pull_calls[0]["force_resume"] == False, "First call should be without force_resume"
        assert pull_calls[1]["force_resume"] == True, "Second call should be with force_resume=True"
        assert result["data"]["download_status"] == "success"

    def test_non_interactive_without_force_resume_fails(self, monkeypatch):
        """Test that non-interactive mode without --force-resume fails properly.

        This ensures we don't silently ignore the requires_confirmation status.
        """
        from unittest.mock import MagicMock
        import argparse

        def mock_pull_operation(model, force_resume=False):
            return {
                "status": "success",
                "data": {
                    "download_status": "requires_confirmation",
                    "model": model,
                    "message": "Model has partial download. Use --force-resume to continue."
                }
            }

        # Mock stdin.isatty() to return False (non-interactive)
        mock_stdin = MagicMock()
        mock_stdin.isatty.return_value = False
        monkeypatch.setattr("sys.stdin", mock_stdin)

        # Create args WITHOUT force_resume
        args = argparse.Namespace(
            command="pull",
            model="test-model",
            json=False,
            force_resume=False,
            verbose=False
        )

        result = mock_pull_operation(args.model)

        # Simulate the CLI logic - should fail for non-interactive without --force-resume
        should_fail = False
        if result.get("data", {}).get("download_status") == "requires_confirmation":
            if args.json:
                pass
            elif getattr(args, "force_resume", False):
                pass  # Would resume
            elif not mock_stdin.isatty():
                should_fail = True  # This is expected behavior

        assert should_fail, "Non-interactive mode without --force-resume should fail"