diff --git a/.gitignore b/.gitignore index 345a55f..bf4c177 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ venv/ venv39/ +venv_*/ test_env*/ test_results*.log mypy_*.log diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f4db3f..ee431c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## [1.0.2] - 2025-08-18 + +### Fixed +- **Issue #11**: Fixed HF_HOME environment variable handling - MLX Knife now correctly uses `$HF_HOME/hub` for model storage, consistent with HuggingFace standard +- **Issue #9**: Fixed silent failure when removing corrupted models with empty snapshots directories +- **Cache Consistency**: Unified cache path logic - both default (`~/.cache/huggingface/hub`) and custom (`$HF_HOME/hub`) paths now consistently use `/hub` subdirectory + +### Enhanced +- **Download Throttling**: Improved adaptive throttling for household-friendly downloads (512KB chunks, 2-3s delays for large files) +- **Migration Warning**: Added helpful warning when models are found in legacy cache locations with clear migration instructions +- **Memory Management**: Enhanced exception-safe resource cleanup and baseline tracking + +### Technical +- **Dependencies**: Updated to latest tested versions (huggingface-hub 0.34.0+, mlx 0.28.0+, fastapi 0.116.0+) +- **Python Support**: Full compatibility verified on Python 3.9-3.13 +- **Test Suite**: All 105 tests passing with real MLX models on Apple Silicon + ## [1.0.1] - 2025-08-15 ### Changed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b5ab425..47c32d1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,6 +60,29 @@ mypy mlx_knife/ mlxk run Phi-3-mini "Hello world" ``` +## Repository Structure + +Understanding what goes where: + +``` +Repository structure: +├── mlx_knife/ # Python package (→ PyPI) +├── tests/ # Test suite +├── simple_chat.html # Web interface (GitHub only) +├── README.md # User documentation +├── CONTRIBUTING.md # This file +├── TESTING.md # Testing guide +├── CLAUDE.md # Development notes +├── pyproject.toml # Build configuration +└── requirements.txt # Dependencies +``` + +**What goes where:** +- **PyPI Package**: Only `mlx_knife/` + build files (`pyproject.toml`, `requirements.txt`) +- **GitHub Repository**: Everything else (documentation, tests, web interface) + +This helps ensure contributors commit files to the right place and understand the package vs. repository distinction. + ## Testing Requirements **Important**: MLX Knife requires Apple Silicon hardware for testing. Tests must be run locally on M1/M2/M3 Macs. @@ -85,18 +108,10 @@ mlxk run Phi-3-mini "Hello world" ```bash # Run all tests pytest - -# Run specific test categories -pytest tests/unit/ # Fast unit tests -pytest tests/integration/ # Integration tests - -# Run with coverage -pytest --cov=mlx_knife --cov-report=html - -# Skip tests requiring models -pytest -k "not requires_model" ``` +For detailed testing options, troubleshooting, and advanced workflows, see **[TESTING.md](TESTING.md)**. + ### Before Submitting PRs Please ensure all tests pass locally: @@ -148,16 +163,9 @@ Mention your Python version in the PR description. ## Testing -- **Unit tests**: Fast, isolated tests in `tests/unit/` -- **Integration tests**: System-level tests in `tests/integration/` -- **Real model tests**: Use Phi-3-mini for testing (it's small and fast) +MLX Knife has comprehensive test coverage. For detailed testing documentation including advanced options, test structure, and troubleshooting, see **[TESTING.md](TESTING.md)**. -Run specific test categories: -```bash -pytest tests/unit/ # Fast unit tests -pytest tests/integration/ # Integration tests -pytest -k "not requires_model" # Skip tests requiring models -``` +**When adding new tests**: Please update the test structure documentation in **[TESTING.md](TESTING.md)** if you add new test files or categories. ## Code Style diff --git a/README.md b/README.md index 37ece50..eae2a8c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A lightweight, ollama-like CLI for managing and running MLX models on Apple Sili > **Note**: MLX Knife is designed as a command-line interface tool only. While some internal functions are accessible via Python imports, only CLI usage is officially supported. -**Current Version**: 1.0.1 (August 2025) +**Current Version**: 1.0.2 (August 2025) [![GitHub Release](https://img.shields.io/github/v/release/mzau/mlx-knife)](https://github.com/mzau/mlx-knife/releases) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) @@ -55,11 +55,6 @@ A lightweight, ollama-like CLI for managing and running MLX models on Apple Sili pip install mlx-knife ``` -### Via GitHub (Development) -```bash -pip install git+https://github.com/mzau/mlx-knife.git -``` - ### Requirements - macOS with Apple Silicon (M1/M2/M3) - Python 3.9+ (native macOS version or newer) @@ -127,6 +122,9 @@ MLX Knife includes a built-in web interface for easy model interaction: # Start the OpenAI-compatible API server mlxk server --port 8000 --max-tokens 4000 +# Get web chat interface from GitHub +curl -O https://raw.githubusercontent.com/mzau/mlx-knife/main/simple_chat.html + # Open web chat interface in your browser open simple_chat.html ``` @@ -227,33 +225,6 @@ After installation, these commands are equivalent: - `mlx-knife` - `mlx_knife` -## Project Structure - -``` -mlx_knife/ -├── __init__.py # Package metadata and version -├── cli.py # Command-line interface and argument parsing -├── cache_utils.py # Core model management functionality -├── mlx_runner.py # Native MLX model execution -├── server.py # OpenAI-compatible API server with FastAPI -├── hf_download.py # HuggingFace download integration -├── throttled_download_worker.py # Background download worker -├── requirements.txt # Python dependencies -├── pyproject.toml # Package configuration -├── simple_chat.html # Built-in web chat interface -└── README.md # This file -``` - -### Module Overview - -- **`cli.py`**: Entry point handling command parsing and dispatch -- **`cache_utils.py`**: Model discovery, metadata extraction, and cache operations -- **`mlx_runner.py`**: MLX model loading, token generation, and streaming -- **`server.py`**: FastAPI-based REST API server with OpenAI compatibility -- **`simple_chat.html`**: Standalone web chat interface for immediate use -- **`hf_download.py`**: Robust downloading with progress tracking -- **`throttled_download_worker.py`**: Prevents network overload during downloads - ## Configuration ### Cache Location @@ -308,70 +279,6 @@ mlxk run bert-base-uncased # Use MLX-Community models: https://huggingface.co/mlx-community ``` -## Testing - -MLX Knife includes comprehensive test coverage across all supported Python versions. - -### Quick Start - -**Prerequisites:** -- Apple Silicon Mac (M1/M2/M3) -- Python 3.9+ -- At least one MLX model: `mlxk pull mlx-community/Phi-3-mini-4k-instruct-4bit` - -**Run Tests:** -```bash -pip install -e ".[test]" -pytest -``` - -### Why Local Testing? - -MLX requires Apple Silicon hardware and real models (4GB+) for testing. This is standard for MLX projects and ensures tests reflect real-world usage. - -For detailed testing documentation, development workflows, and multi-Python verification, see **[TESTING.md](TESTING.md)**. - -## Part of the BROKE Ecosystem 🦫 - -MLX Knife is the first component of [BROKE Cluster](https://github.com/mzau/broke-cluster), -our research project for intelligent LLM routing across heterogeneous Apple Silicon networks. - -- **Use MLX Knife**: For single Mac setups (available now) -- **Use BROKE Cluster**: For multi-Mac environments (in development) - -## Technical Details - -### Token Decoding -MLX Knife uses context-aware decoding to handle tokenizers that encode spaces as separate tokens: - -```python -# Sliding window approach maintains context for proper spacing -window_tokens = generated_tokens[-10:] # Last 10 tokens -window_text = tokenizer.decode(window_tokens) -``` - -### Stop Token Detection -Stop tokens are dynamically extracted from each model's tokenizer: -- Primary: `tokenizer.eos_token` -- Secondary: `tokenizer.pad_token` (if different) -- Additional: Special tokens containing 'end', 'stop', or 'eot' -- Common tokens verified as single-token entities - -### Memory Management -- **Context Managers**: Automatic resource cleanup with Python context managers -- **Exception-Safe**: Model cleanup guaranteed even on errors -- **Baseline Tracking**: Memory captured before model loading -- **Real-time Monitoring**: GPU memory tracking via `mlx.core.get_active_memory()` -- **Memory Statistics**: Detailed usage displayed after generation -- **Leak Prevention**: Automatic `mx.clear_cache()` and garbage collection - -```python -# Context manager pattern (automatic cleanup) -with MLXRunner(model_path) as runner: - response = runner.generate_batch(prompt) -# Model automatically cleaned up here -``` - ## Troubleshooting ### Model Not Found @@ -394,17 +301,7 @@ mlxk list --all ## Contributing -Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. - -**Quick Start:** -1. Fork and clone the repository -2. Install with development tools: `pip install -e ".[dev,test]"` -3. Make your changes and add tests -4. Run tests locally on Apple Silicon: `pytest` -5. Check code style: `ruff check mlx_knife/ --fix` -6. Submit a pull request - -We prioritize compatibility with Python 3.9 (native macOS) but welcome contributions tested on any version 3.9+. +Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines. ## Security @@ -428,6 +325,6 @@ Copyright (c) 2025 The BROKE team 🦫

Made with ❤️ by The BROKE team BROKE Logo
- Version 1.0-rc3 | August 2025
+ Version 1.0.2 | August 2025
🔮 Next: BROKE Cluster for multi-node deployments

diff --git a/SECURITY.md b/SECURITY.md index 7bbae0d..7cdb5aa 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -98,6 +98,7 @@ mlxk server --host 0.0.0.0 --port 8000 | Version | Supported | | ------- | ------------------ | +| 1.0.2 | :white_check_mark: | | 1.0.1 | :white_check_mark: | | < 1.0 | :x: | diff --git a/TESTING.md b/TESTING.md index acb4a49..aa03f6b 100644 --- a/TESTING.md +++ b/TESTING.md @@ -29,8 +29,6 @@ This approach ensures our tests reflect real-world usage, not mocked behavior. ``` tests/ -├── TESTING.md # This file -├── mlx_knife_test_requirements.md # Original test requirements ├── conftest.py # Shared fixtures and utilities ├── integration/ # System-level integration tests │ ├── test_core_functionality.py # Basic CLI operations @@ -40,7 +38,8 @@ tests/ │ └── test_server_functionality.py # OpenAI API server tests └── unit/ # Module-level unit tests ├── test_cache_utils.py # Cache management functions - └── test_cli.py # CLI argument parsing + ├── test_cli.py # CLI argument parsing + └── test_mlx_runner_memory.py # Memory management tests ``` ## Test Prerequisites @@ -146,8 +145,8 @@ pytest -n auto ### ✅ Current Test Status (August 2025) ``` -Total Tests: 104/104 passing (100% ✅) -├── ✅ Integration Tests: 61 passing +Total Tests: 105/105 passing (100% ✅) +├── ✅ Integration Tests: 62 passing ├── ✅ Unit Tests: 25 passing └── ✅ Real MLX Model Tests: All passing with Phi-3-mini ``` @@ -164,7 +163,7 @@ Total Tests: 104/104 passing (100% ✅) | Category | Count | Description | |----------|-------|-------------| | **Unit Tests** | 25 | Fast, isolated function tests | -| **Integration Tests** | 61 | Full system behavior tests | +| **Integration Tests** | 62 | Full system behavior tests | | **Model Execution** | 7 | Real MLX model running | | **Process Lifecycle** | 8 | Signal handling and cleanup | | **Health Checks** | 12 | Corruption detection | @@ -173,7 +172,7 @@ Total Tests: 104/104 passing (100% ✅) ## Python Version Compatibility ### Compatibility Status -MLX Knife 1.0.1 is fully compatible with Python 3.9-3.13. Comprehensive verification completed with 104/104 tests passing on all supported versions. +MLX Knife 1.0.1 is fully compatible with Python 3.9-3.13. Comprehensive verification completed with 105/105 tests passing on all supported versions. ### Manual Multi-Python Testing @@ -195,11 +194,11 @@ deactivate && rm -rf test_39 | Python Version | Status | Tests Passing | |----------------|--------|---------------| -| 3.9.6 (macOS) | ✅ Verified | 104/104 | -| 3.10.x | ✅ Verified | 104/104 | -| 3.11.x | ✅ Verified | 104/104 | -| 3.12.x | ✅ Verified | 104/104 | -| 3.13.x | ✅ Verified | 104/104 | +| 3.9.6 (macOS) | ✅ Verified | 105/105 | +| 3.10.x | ✅ Verified | 105/105 | +| 3.11.x | ✅ Verified | 105/105 | +| 3.12.x | ✅ Verified | 105/105 | +| 3.13.x | ✅ Verified | 105/105 | All versions tested with real MLX model execution (Phi-3-mini-4k-instruct-4bit). @@ -339,7 +338,7 @@ When submitting PRs, please include: Platform: macOS 14.5, M2 Pro Python: 3.11.6 Model: Phi-3-mini-4k-instruct-4bit - Results: 104/104 tests passed + Results: 105/105 tests passed ``` 3. **Any issues encountered** and how you resolved them @@ -348,7 +347,7 @@ When submitting PRs, please include: **MLX Knife 1.0.1 Testing Status:** -✅ **Production Ready** - 104/104 tests passing +✅ **Production Ready** - 105/105 tests passing ✅ **Multi-Python Support** - Python 3.9-3.13 verified ✅ **Code Quality** - ruff/mypy integration working ✅ **Real Model Testing** - Phi-3-mini execution confirmed diff --git a/mlx_knife/__init__.py b/mlx_knife/__init__.py index c6aee7a..ddb0af5 100644 --- a/mlx_knife/__init__.py +++ b/mlx_knife/__init__.py @@ -4,7 +4,7 @@ A lightweight, ollama-like CLI for managing and running MLX models on Apple Sili Provides native MLX execution with streaming output and interactive chat capabilities. """ -__version__ = "1.0.1" +__version__ = "1.0.2" __author__ = "The BROKE team" __email__ = "broke@gmx.eu" __license__ = "MIT" diff --git a/mlx_knife/cache_utils.py b/mlx_knife/cache_utils.py index 32ee594..a12b889 100644 --- a/mlx_knife/cache_utils.py +++ b/mlx_knife/cache_utils.py @@ -7,10 +7,22 @@ import shutil import sys from pathlib import Path -__version__ = "1.0-beta-1" +DEFAULT_CACHE_ROOT = Path.home() / ".cache/huggingface" +CACHE_ROOT = Path(os.environ.get("HF_HOME", DEFAULT_CACHE_ROOT)) +MODEL_CACHE = CACHE_ROOT / "hub" -DEFAULT_CACHE = Path.home() / ".cache/huggingface/hub" -MODEL_CACHE = Path(os.environ.get("HF_HOME", DEFAULT_CACHE)) +# Global variable to track if warning was shown +_legacy_warning_shown = False + +# Check for models in legacy location and warn user +_legacy_models = list(CACHE_ROOT.glob("models--*")) +_is_test_env = "test_cache" in str(CACHE_ROOT) or "PYTEST_CURRENT_TEST" in os.environ +if _legacy_models and not _legacy_warning_shown and not _is_test_env: + print(f"\n⚠️ Found {len(_legacy_models)} models in legacy location: {CACHE_ROOT}") + print(f" Please move them to: {MODEL_CACHE}") + print(f" Command: mv {CACHE_ROOT}/models--* {MODEL_CACHE}/") + print(" This warning will appear until models are moved.\n") + _legacy_warning_shown = True def hf_to_cache_dir(hf_name: str) -> str: @@ -115,7 +127,8 @@ def get_model_path(model_spec): if snapshots: latest = max(snapshots, key=lambda x: x.stat().st_mtime) return latest, model_name, latest.name - return None, model_name, commit_hash + # Return base_cache_dir for corrupted models so rm_model can handle them + return base_cache_dir, model_name, commit_hash def parse_model_spec(model_spec): if "@" in model_spec: @@ -189,7 +202,7 @@ def get_model_hash(model_path): return latest.name[:8] def is_model_healthy(model_spec): - model_path, _, _ = get_model_path(model_spec) + model_path, _, _ = resolve_single_model(model_spec) if not model_path: return False config_path = model_path / "config.json" @@ -246,118 +259,113 @@ def check_model_health(model_spec): model_path, model_name, commit_hash = resolve_single_model(model_spec) if not model_path: # resolve_single_model already printed the appropriate error message - # Try one more fallback: check if this is an exact model name that exists but is corrupted - try: - fallback_model_name, fallback_commit_hash = parse_model_spec(model_spec) - base_cache_dir = MODEL_CACHE / hf_to_cache_dir(fallback_model_name) - if base_cache_dir.exists(): - print(f"[ERROR] Model '{model_spec}' directory exists but no snapshots found!") - confirm = input("Model appears corrupted. Delete? [y/N] ") - if confirm.lower() == "y": - import errno - import shutil - try: - shutil.rmtree(base_cache_dir) - print(f"Model {fallback_model_name} deleted.") - except PermissionError as e: - print(f"[ERROR] Permission denied: Cannot delete {e.filename}") - print(" Try running with appropriate permissions or manually delete the directory.") - except OSError as e: - if e.errno == errno.ENOTEMPTY: - print(f"[ERROR] Directory not empty: {e.filename}") - print(" Another process may be using this model.") - elif e.errno == errno.EACCES: - print(f"[ERROR] Access denied: {e.filename}") - else: - print(f"[ERROR] OS Error while deleting: {e}") - except Exception as e: - print(f"[ERROR] Unexpected error while deleting: {type(e).__name__}: {e}") - except: - # If even fallback parsing fails, just return - pass return False + print(f"Checking model: {model_name}") if commit_hash: print(f"Hash: {commit_hash}") - issues = [] - if not (model_path / "config.json").exists(): - issues.append("config.json missing") + + # Use the robust health check + if is_model_healthy(model_spec): + print("\n[OK] Model is healthy and usable!") + return True else: - print("config.json found") - weight_files = list(model_path.glob("*.safetensors")) + list(model_path.glob("*.bin")) - if not weight_files: - weight_files = list(model_path.glob("**/*.safetensors")) + list(model_path.glob("**/*.bin")) - if not weight_files: - index_file = model_path / "model.safetensors.index.json" - if index_file.exists(): + # Detailed diagnosis for WHY it's unhealthy + print("\n[ERROR] Model is corrupted. Detailed diagnosis:") + + # Check config.json + config_path = model_path / "config.json" + if not config_path.exists(): + print(" - config.json missing") + else: try: - with open(index_file) as f: + with open(config_path) as f: + config_data = json.load(f) + if not isinstance(config_data, dict) or len(config_data) == 0: + print(" - config.json is empty or invalid") + else: + print(" - config.json found and valid") + except (OSError, json.JSONDecodeError): + print(" - config.json exists but contains invalid JSON") + + # Check weight files (including gguf support like is_model_healthy) + weight_files = list(model_path.glob("*.safetensors")) + list(model_path.glob("*.bin")) + list(model_path.glob("*.gguf")) + if not weight_files: + weight_files = list(model_path.glob("**/*.safetensors")) + list(model_path.glob("**/*.bin")) + list(model_path.glob("**/*.gguf")) + + if weight_files: + total_size = sum(f.stat().st_size for f in weight_files) + size_mb = total_size / (1024 * 1024) + print(f" - Model weights found ({len(weight_files)} files, {size_mb:.1f}MB)") + elif (model_path / "model.safetensors.index.json").exists(): + # Check multi-file model + try: + with open(model_path / "model.safetensors.index.json") as f: index = json.load(f) if 'weight_map' in index: referenced_files = set(index['weight_map'].values()) existing_files = [f for f in referenced_files if (model_path / f).exists()] - if len(existing_files) > 0: + if existing_files: total_size = sum((model_path / f).stat().st_size for f in existing_files) size_mb = total_size / (1024 * 1024) - print(f"Model weights present ({len(existing_files)}/{len(referenced_files)} files, {size_mb:.1f}MB)") + print(f" - Multi-file weights ({len(existing_files)}/{len(referenced_files)} files, {size_mb:.1f}MB)") if len(existing_files) < len(referenced_files): - issues.append(f"Incomplete weights: {len(existing_files)}/{len(referenced_files)} files") + print(" - Incomplete multi-file model") else: - issues.append("Multi-file model: No weight files found") + print(" - Multi-file model index found but no weight files exist") else: - issues.append("Multi-file model: Invalid index") + print(" - Multi-file model index is invalid") except Exception as e: - issues.append(f"Multi-file model: Index error - {e}") + print(f" - Multi-file model index error: {e}") else: - issues.append("No model weights found") - else: - total_size = sum(f.stat().st_size for f in weight_files) - size_mb = total_size / (1024 * 1024) - print(f"Model weights present ({len(weight_files)} files, {size_mb:.1f}MB)") - lfs_ok, lfs_msg = check_lfs_corruption(model_path) - if lfs_ok: - print(f"[OK] {lfs_msg}") - else: - issues.append(lfs_msg) - framework = detect_framework(model_path.parent.parent, model_name) - print(f"Framework: {framework}") - if issues: - print("\n[ERROR] Issues found:") - for issue in issues: - print(f" - {issue}") - - if len(issues) >= 2: # Multiple issues = critical - confirm = input("Model appears corrupted. Delete? [y/N] ") - if confirm.lower() == "y": - import errno - import shutil - try: - if commit_hash: - # Delete specific hash/snapshot + print(" - No model weights found (.safetensors, .bin, .gguf)") + + # Check LFS corruption + lfs_ok, lfs_msg = check_lfs_corruption(model_path) + if not lfs_ok: + print(f" - {lfs_msg}") + else: + print(f" - {lfs_msg}") + + # Show framework + framework = detect_framework(model_path.parent.parent, model_name) + print(f" - Framework: {framework}") + + # Offer deletion for corrupted models + confirm = input("\nModel appears corrupted. Delete? [y/N] ") + if confirm.lower() == "y": + import errno + import shutil + try: + if commit_hash: + # Delete specific hash/snapshot + shutil.rmtree(model_path) + print(f"Hash {commit_hash} deleted.") + else: + # Delete entire model directory (go up from snapshots or use base_cache_dir) + if model_path.name.startswith("models--"): + # model_path is base_cache_dir (corrupted model case) shutil.rmtree(model_path) - print(f"Hash {commit_hash} deleted.") else: - # Delete entire model directory (go up from snapshots) + # model_path is snapshot dir model_base_dir = model_path.parent.parent shutil.rmtree(model_base_dir) - print(f"Model {model_name} deleted.") - except PermissionError as e: - print(f"[ERROR] Permission denied: Cannot delete {e.filename}") - print(" Try running with appropriate permissions or manually delete the directory.") - except OSError as e: - if e.errno == errno.ENOTEMPTY: - print(f"[ERROR] Directory not empty: {e.filename}") - print(" Another process may be using this model.") - elif e.errno == errno.EACCES: - print(f"[ERROR] Access denied: {e.filename}") - else: - print(f"[ERROR] OS Error while deleting: {e}") - except Exception as e: - print(f"[ERROR] Unexpected error while deleting: {type(e).__name__}: {e}") + print(f"Model {model_name} deleted.") + except PermissionError as e: + print(f"[ERROR] Permission denied: Cannot delete {e.filename}") + print(" Try running with appropriate permissions or manually delete the directory.") + except OSError as e: + if e.errno == errno.ENOTEMPTY: + print(f"[ERROR] Directory not empty: {e.filename}") + print(" Another process may be using this model.") + elif e.errno == errno.EACCES: + print(f"[ERROR] Access denied: {e.filename}") + else: + print(f"[ERROR] OS Error while deleting: {e}") + except Exception as e: + print(f"[ERROR] Unexpected error while deleting: {type(e).__name__}: {e}") + return False - else: - print("\n[OK] Model is healthy and usable!") - return True def check_all_models_health(): models = [d for d in MODEL_CACHE.iterdir() if d.name.startswith("models--")] diff --git a/mlx_knife/hf_download.py b/mlx_knife/hf_download.py index b5c69a9..4eb6869 100644 --- a/mlx_knife/hf_download.py +++ b/mlx_knife/hf_download.py @@ -15,7 +15,10 @@ except ImportError: from pathlib import Path def parse_model_spec(x): return (x, None) def hf_to_cache_dir(x): return x - MODEL_CACHE = Path(os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface/hub"))) + if "HF_HOME" in os.environ: + MODEL_CACHE = Path(os.environ["HF_HOME"]) / "hub" + else: + MODEL_CACHE = Path(os.path.expanduser("~/.cache/huggingface/hub")) def is_model_healthy(x): return False def describe_http_exception(exc): @@ -36,7 +39,7 @@ def describe_http_exception(exc): def configure_download_environment(): os.environ['HF_HUB_DOWNLOAD_THREADS'] = '1' - os.environ['HF_HUB_DOWNLOAD_CHUNK_SIZE'] = '1048576' + os.environ['HF_HUB_DOWNLOAD_CHUNK_SIZE'] = '524288' # 512KB chunks for household-friendly downloads os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = 'false' def pull_model(model_spec): diff --git a/mlx_knife/throttled_download_worker.py b/mlx_knife/throttled_download_worker.py index e1dadb0..50b5b6e 100644 --- a/mlx_knife/throttled_download_worker.py +++ b/mlx_knife/throttled_download_worker.py @@ -3,9 +3,18 @@ import os import signal import sys import time +from typing import Any + +# Global tracking for accurate download rate +_download_stats = { + 'bytes_downloaded': 0, + 'start_time': None, + 'last_update': None, + 'actual_download_time': 0.0 # Time spent actually downloading (without delays) +} -def signal_handler(signum, frame): +def signal_handler(signum: int, frame: Any) -> None: print("\n[WARNING] Download cancelled by user.") sys.exit(0) @@ -13,7 +22,7 @@ signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) os.environ["HF_HUB_DOWNLOAD_THREADS"] = "1" -os.environ["HF_HUB_DOWNLOAD_CHUNK_SIZE"] = "1048576" +os.environ["HF_HUB_DOWNLOAD_CHUNK_SIZE"] = "524288" # 512KB chunks (half size) os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "false" try: @@ -23,16 +32,66 @@ except ImportError: print("[ERROR] huggingface_hub or requests not installed in worker environment!") sys.exit(2) -# Throttle all HTTP(S) requests +# Throttle all HTTP(S) requests with adaptive delays original_get = requests.get original_post = requests.post -def throttled_get(*args, **kwargs): +def get_adaptive_delay(url: str, response: Any) -> float: + """Calculate delay based on file type and size""" + if not url: + return 1.0 + + # Check if this is a large model file download + if any(ext in url.lower() for ext in ['.safetensors', '.bin', '.pth']): + # For large model files, use more aggressive throttling + content_length = response.headers.get('content-length') + if content_length: + size_mb = int(content_length) / (1024 * 1024) + if size_mb > 100: # Files larger than 100MB + return 3.0 # 3 second delay between chunks + elif size_mb > 10: # Files larger than 10MB + return 2.0 # 2 second delay + return 2.0 # Default for model files + + # Regular files (config.json, tokenizer files, etc.) + return 0.5 + +def throttled_get(*args: Any, **kwargs: Any) -> Any: + download_start = time.time() response = original_get(*args, **kwargs) - time.sleep(1.0) + download_end = time.time() + + # Track actual download time (without delays) + actual_download_time = download_end - download_start + _download_stats['actual_download_time'] += actual_download_time + + # Track bytes if we can determine them + url = args[0] if args else kwargs.get('url', '') + if hasattr(response, 'headers') and 'content-length' in response.headers: + content_length = int(response.headers['content-length']) + _download_stats['bytes_downloaded'] += content_length + + # Initialize timing if first download + if _download_stats['start_time'] is None: + _download_stats['start_time'] = download_start + + # Print accurate rate every ~5MB or every 10 seconds + now = time.time() + if (_download_stats['last_update'] is None or + now - _download_stats['last_update'] > 10 or + _download_stats['bytes_downloaded'] % (5 * 1024 * 1024) < content_length): + + if _download_stats['actual_download_time'] > 0: + real_rate_mbps = (_download_stats['bytes_downloaded'] / _download_stats['actual_download_time']) / (1024 * 1024) + total_mb = _download_stats['bytes_downloaded'] / (1024 * 1024) + print(f"[THROTTLE] Downloaded {total_mb:.1f}MB at real rate: {real_rate_mbps:.1f}MB/s (excluding delays)") + _download_stats['last_update'] = now + + delay = get_adaptive_delay(url, response) + time.sleep(delay) return response -def throttled_post(*args, **kwargs): +def throttled_post(*args: Any, **kwargs: Any) -> Any: response = original_post(*args, **kwargs) time.sleep(0.5) return response @@ -40,7 +99,7 @@ def throttled_post(*args, **kwargs): requests.get = throttled_get requests.post = throttled_post -def main(): +def main() -> None: if len(sys.argv) != 2: print("Usage: python throttled_download_worker.py ") sys.exit(1) diff --git a/pyproject.toml b/pyproject.toml index 3207f9b..55d6bc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ authors = [ {name = "The BROKE team", email = "broke@gmx.eu"}, ] classifiers = [ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Programming Language :: Python :: 3", @@ -26,13 +26,13 @@ classifiers = [ "Environment :: Console", ] dependencies = [ - "huggingface-hub>=0.19.0", - "requests>=2.31.0", - "mlx>=0.26.0", - "mlx-lm>=0.25.0", - "fastapi>=0.104.0", - "uvicorn>=0.24.0", - "pydantic>=2.4.0", + "huggingface-hub>=0.34.0", + "requests>=2.32.0", + "mlx>=0.28.0", + "mlx-lm>=0.26.0", + "fastapi>=0.116.0", + "uvicorn>=0.35.0", + "pydantic>=2.11.0", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index 61fa4b5..5e3f008 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ # mlx_knife requirements # Core dependencies for HuggingFace model management -huggingface-hub>=0.19.0 -requests>=2.31.0 -mlx-lm>=0.25.0 # For running MLX models with streaming support -mlx>=0.26.0 # Core MLX library +huggingface-hub>=0.34.0 +requests>=2.32.0 +mlx-lm>=0.26.0 # For running MLX models with streaming support +mlx>=0.28.0 # Core MLX library # API Server dependencies (for 'mlxk server' command) -fastapi>=0.104.0 -uvicorn>=0.24.0 -pydantic>=2.4.0 +fastapi>=0.116.0 +uvicorn>=0.35.0 +pydantic>=2.11.0 -# Note: Python 3.10+ recommended for full MLX features \ No newline at end of file +# Note: Python 3.9+ supported, tested on Apple Silicon M1/M2/M3 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 4e645e8..d5fdfa8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,10 @@ def temp_cache_dir() -> Generator[Path, None, None]: cache_path = Path(temp_dir) / "test_cache" cache_path.mkdir() + # Create hub subdirectory (required by HF_HOME/hub fix) + hub_path = cache_path / "hub" + hub_path.mkdir() + # Set HF_HOME to our temp directory old_hf_home = os.environ.get("HF_HOME") os.environ["HF_HOME"] = str(cache_path) @@ -103,7 +107,9 @@ def mock_model_cache(temp_cache_dir): """Create a mock model in the cache directory.""" # Convert model name to cache directory format cache_name = model_name.replace("/", "--") - model_dir = temp_cache_dir / f"models--{cache_name}" / "snapshots" / "main" + # Create models in hub subdirectory (HF_HOME/hub fix) + hub_dir = temp_cache_dir / "hub" + model_dir = hub_dir / f"models--{cache_name}" / "snapshots" / "main" model_dir.mkdir(parents=True, exist_ok=True) if healthy and not corruption_type: diff --git a/tests/integration/test_core_functionality.py b/tests/integration/test_core_functionality.py index ea358da..6994084 100644 --- a/tests/integration/test_core_functionality.py +++ b/tests/integration/test_core_functionality.py @@ -94,6 +94,45 @@ class TestBasicOperations: # Should not crash # Exact behavior depends on implementation + def test_rm_command_corrupted_empty_snapshots(self, mlx_knife_process, temp_cache_dir): + """Remove command should handle corrupted models with empty snapshots directory.""" + from mlx_knife.cache_utils import hf_to_cache_dir + + # Create a corrupted model structure (directory exists but snapshots is empty) + test_model = "test-org/corrupted-empty-model" + # Create in hub subdirectory (new cache structure) + hub_dir = temp_cache_dir / "hub" + cache_dir = hub_dir / hf_to_cache_dir(test_model) + cache_dir.mkdir(parents=True, exist_ok=True) + (cache_dir / "snapshots").mkdir(exist_ok=True) + (cache_dir / "blobs").mkdir(exist_ok=True) + (cache_dir / "refs").mkdir(exist_ok=True) + + try: + # This should NOT fail silently - should either provide error message or handle deletion + proc = mlx_knife_process(["rm", test_model]) + stdout, stderr = proc.communicate(timeout=10) + + # Should complete (not hang) + assert proc.returncode is not None, "Remove command hung on corrupted model" + + # Should produce SOME output (not silent failure) + output = (stdout + stderr).strip() + assert len(output) > 0, "Remove command failed silently on corrupted model - no output produced" + + # The behavior should be explicit: either error message or deletion prompt/confirmation + output_lower = output.lower() + has_error = "error" in output_lower or "not found" in output_lower + has_prompt = "delete" in output_lower or "remove" in output_lower + + assert has_error or has_prompt, f"Remove command should provide clear feedback, got: {output}" + + finally: + # Cleanup - remove the test corrupted model structure + import shutil + if cache_dir.exists(): + shutil.rmtree(cache_dir) + @pytest.mark.timeout(60) class TestModelExecution: