mirror of
https://github.com/cloudstack-llc/mlx-knife.git
synced 2026-06-30 20:48:03 -04:00
fix: serve --model pre-validation + ADR-022 name resolution semantics
- serve.py: Pre-validate model before server start (ambiguity + not-found) - ADR-022: Document name resolution semantics, command scope table - clone.py: Remove unused import (ruff fix)
This commit is contained in:
@@ -285,6 +285,59 @@ mlxk run whisper-large-v3
|
||||
|
||||
---
|
||||
|
||||
## Name Resolution Semantics
|
||||
|
||||
### Fuzzy Matching by Context
|
||||
|
||||
| Resolution Type | Example | Fuzzy? | Rationale |
|
||||
|-----------------|---------|--------|-----------|
|
||||
| **Name** in namespace | `mlxk run whisper` | ✅ Yes | Namespace search (MLXK_WORKSPACE_HOME, then HF cache) |
|
||||
| **Explicit path** | `mlxk run /path/whisper` | ❌ No | User points to concrete location |
|
||||
| **Query** (list) | `mlxk list /path/pix` | ✅ Yes | Search/discovery, not execution |
|
||||
|
||||
**Security rationale:** Explicit paths (`/`, `./`, `../`) have exact semantics, analogous to `exec()` vs shell globbing. User intent is explicit → resolution is exact.
|
||||
|
||||
```bash
|
||||
MLXK_WORKSPACE_HOME=~/mlx-models
|
||||
|
||||
# Name → Fuzzy in MLXK_WORKSPACE_HOME (then HF cache)
|
||||
mlxk run whisper # → ~/mlx-models/whisper-large-v3-mlx ✅
|
||||
mlxk run pixtral # → Error: Ambiguous (pixtral-12b-8bit, pixtral-12b-4bit)
|
||||
|
||||
# Explicit path → Exact match required
|
||||
mlxk run ~/mlx-models/whisper # → Error: not found
|
||||
mlxk run ~/mlx-models/whisper-large-v3-mlx # → OK ✅
|
||||
|
||||
# Query → Fuzzy (discovery)
|
||||
mlxk list ~/mlx-models/whisper # → shows whisper-large-v3-mlx
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Command Scope
|
||||
|
||||
### Which commands work with workspaces?
|
||||
|
||||
| Command | Cache | Workspace | Notes |
|
||||
|---------|-------|-----------|-------|
|
||||
| `list` | ✅ | ✅ | Shows both with `source` column |
|
||||
| `show` | ✅ | ✅ | Includes workspace metadata |
|
||||
| `health` | ✅ | ✅ | Workspace-specific checks |
|
||||
| `run` | ✅ | ✅ | Primary use case |
|
||||
| `serve` | ✅ | ✅ | Via `--model ./path` |
|
||||
| `pull` | ✅ | ❌ | Cache only (by design) |
|
||||
| `clone` | ❌ | ✅ | HF Hub → workspace (direct download) |
|
||||
| `push` | ❌ | ✅ | Workspace → HF Hub |
|
||||
| `rm` | ✅ | ❌ | **Cache only** — use `rm -rf ./workspace` |
|
||||
|
||||
**Why no `mlxk rm` for workspaces?**
|
||||
- Workspaces are user-managed directories (like any project folder)
|
||||
- User has full filesystem control — standard `rm -rf` is appropriate
|
||||
- Avoids accidental deletion of user data vs. cache (which is regenerable)
|
||||
- Principle: mlx-knife manages cache, user manages workspaces
|
||||
|
||||
---
|
||||
|
||||
## UX Details
|
||||
|
||||
### list: Source Column
|
||||
|
||||
@@ -23,7 +23,7 @@ from typing import Optional, Dict, Any, Tuple
|
||||
|
||||
from .pull import pull_to_cache
|
||||
from .workspace import write_workspace_sentinel
|
||||
from ..core.cache import hf_to_cache_dir, get_current_cache_root
|
||||
from ..core.cache import hf_to_cache_dir
|
||||
from mlxk2 import __version__
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -134,6 +134,20 @@ def start_server(
|
||||
# Suppress tqdm progress bars in server mode (must be set before tqdm import)
|
||||
os.environ["TQDM_DISABLE"] = "1"
|
||||
if model:
|
||||
# Pre-validate model specification before starting server (consistency with run.py)
|
||||
from ..core.model_resolution import resolve_model_for_operation
|
||||
from .workspace import is_explicit_path
|
||||
resolved_name, _, ambiguous = resolve_model_for_operation(model)
|
||||
if ambiguous:
|
||||
raise ValueError(
|
||||
f"Ambiguous model specification '{model}'. Could be: {ambiguous}"
|
||||
)
|
||||
if not resolved_name:
|
||||
# Model not found - give appropriate error message
|
||||
if is_explicit_path(model):
|
||||
raise ValueError(f"Workspace not found: {model}")
|
||||
else:
|
||||
raise ValueError(f"Model not found in cache: {model}")
|
||||
os.environ["MLXK2_PRELOAD_MODEL"] = model
|
||||
if max_tokens is not None:
|
||||
os.environ["MLXK2_MAX_TOKENS"] = str(max_tokens)
|
||||
|
||||
Reference in New Issue
Block a user