Files
mlx-knife/mlxk2/cli.py
T
The BROKE Cluster Team bf7480d042 Release 2.0.4-beta.9: Audio transcription via mlx-audio
Major Features:
- Audio transcription via mlx-audio backend (Whisper, >10min duration)
- OpenAI /v1/audio/transcriptions endpoint
- Memory Gate System (Vision: 8GB, Audio: 4GB)
- Config-based backend routing (ADR-020)
- Benchmark toolchain (memmon/memplot, Schema v0.2.2)

Key Fixes:
- EuroLLM tokenizer decoding
- Vision-model text-only routing regression
- Multimodal model context length detection
- Memory cleanup bug (mx.metal.clear_cache)
- Orphan process bug

Test Results:
- Unit tests: 647 passed, 11 skipped (Python 3.10-3.12)
- wet-umbrella: 171 passed total

See CHANGELOG.md for complete details and known issues.
2026-02-04 03:10:30 +01:00

678 lines
32 KiB
Python

#!/usr/bin/env python3
"""MLX-Knife CLI - HuggingFace model management for MLX."""
import argparse
import json
import os
import signal
import subprocess
import sys
from pathlib import Path
from typing import Dict, Any, Optional
# Suppress huggingface_hub progress bars (used by mlx-vlm during model loading)
# These progress bars are informational only and can confuse users since
# mlx-knife manages downloads via `pull`, not during `run`
# os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
from . import __version__
from .operations.list import list_models
from .operations.health import health_check_operation
from .operations.pull import pull_operation
from .operations.rm import rm_operation
from .operations.push import push_operation
from .operations.show import show_model_operation
from .operations.run import run_model_enhanced
from .spec import JSON_API_SPEC_VERSION
from .output.human import (
render_list,
render_health,
render_show,
render_pull,
render_clone,
render_rm,
)
def format_json_output(data: Dict[str, Any]) -> str:
"""Format output as JSON."""
return json.dumps(data, indent=2)
def _get_system_memory_bytes() -> Optional[int]:
"""Get total system memory in bytes via sysctl (macOS only).
Returns:
Total memory in bytes, or None if unavailable.
"""
try:
result = subprocess.run(
["sysctl", "-n", "hw.memsize"],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0:
return int(result.stdout.strip())
except (subprocess.SubprocessError, ValueError, FileNotFoundError):
pass
return None
def print_result(result: Dict[str, Any], render_func=None, json_mode=False, **render_kwargs):
"""Print command result to stdout (JSON, success) or stderr (human errors).
Args:
result: Command result dict with 'status' field
render_func: Human-mode rendering function (if json_mode=False)
json_mode: If True, output JSON format (always to stdout)
**render_kwargs: Additional arguments for render_func
"""
is_error = result.get("status") == "error"
if json_mode:
# JSON mode: Always stdout (for scripting/jq)
print(format_json_output(result), file=sys.stdout)
elif is_error:
# Human-mode error: stderr (for pipes)
error_info = result.get("error", {})
message = error_info.get("message", "Unknown error")
command = result.get("command", "command")
print(f"{command}: Error: {message}", file=sys.stderr)
elif render_func:
# Human-mode success: stdout
print(render_func(result, **render_kwargs), file=sys.stdout)
else:
# Fallback: print JSON to stdout
print(format_json_output(result), file=sys.stdout)
def handle_error(error_type: str, message: str) -> Dict[str, Any]:
"""Format error as JSON response."""
return {
"status": "error",
"command": None,
"data": None,
"error": {
"type": error_type,
"message": message
}
}
class MLXKArgumentParser(argparse.ArgumentParser):
"""ArgumentParser that prints JSON errors when --json is present.
This ensures invocations like `mlxk2 push --json --private` (missing args)
emit a JSON error instead of argparse usage text.
"""
def error(self, message): # type: ignore[override]
want_json = "--json" in sys.argv
if want_json:
err = handle_error("CommandError", message)
print(format_json_output(err), file=sys.stdout)
self.exit(2)
super().error(message)
def main():
"""Main CLI entry point."""
# Handle SIGPIPE gracefully for Unix pipe workflows (e.g., `mlxk run model | head -1`)
# Without this, Python raises BrokenPipeError when downstream closes the pipe early.
# SIG_DFL restores the default behavior (silent termination) expected by Unix tools.
# On Windows, SIGPIPE doesn't exist - the signal module handles this gracefully.
if hasattr(signal, 'SIGPIPE'):
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
parser = MLXKArgumentParser(
prog="mlxk2",
description="MLX-Knife - HuggingFace model management for MLX",
epilog=(
"Note: mlx-knife can download and run third-party models (e.g. from Hugging Face).\n"
"Each model has its own license. You are responsible for reviewing and complying\n"
"with those license terms."
),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
# Add version argument (supports --json)
parser.add_argument("--version", action="store_true", help="Show version information and exit")
parser.add_argument("--json", action="store_true", help="Output in JSON format (with --version or per command)")
subparsers = parser.add_subparsers(dest="command", help="Available commands", parser_class=MLXKArgumentParser)
# List command
list_parser = subparsers.add_parser("list", help="List all cached models")
list_parser.add_argument("pattern", nargs="?", help="Filter models by pattern (optional)")
# Human-output modifiers (JSON output remains unchanged)
list_parser.add_argument("--all", action="store_true", dest="show_all", help="Show all details (human output)")
list_parser.add_argument("--health", action="store_true", dest="show_health", help="Include health column (human output)")
list_parser.add_argument("--verbose", action="store_true", help="Verbose details (human output)")
list_parser.add_argument("--json", action="store_true", help="Output in JSON format")
# Health command
health_parser = subparsers.add_parser("health", help="Check model health")
health_parser.add_argument("model", nargs="?", help="Model pattern to check (optional)")
health_parser.add_argument("--json", action="store_true", help="Output in JSON format")
# Show command
show_parser = subparsers.add_parser("show", help="Show detailed model information")
show_parser.add_argument("model", help="Model name to show")
show_parser.add_argument("--files", action="store_true", help="Include file listing")
show_parser.add_argument("--config", action="store_true", help="Include config.json content")
show_parser.add_argument("--json", action="store_true", help="Output in JSON format")
# Pull command
pull_parser = subparsers.add_parser("pull", help="Download a model")
pull_parser.add_argument("model", help="Model name to download")
pull_parser.add_argument("--json", action="store_true", help="Output in JSON format")
pull_parser.add_argument("--force-resume", action="store_true", help="Force resume of partial downloads without prompting")
# Clone command - create local workspace from cached model
clone_parser = subparsers.add_parser("clone", help="Clone a model to a local workspace")
clone_parser.add_argument("model", help="Model name to clone (org/repo[@revision])")
clone_parser.add_argument("target_dir", help="Target directory for workspace")
clone_parser.add_argument("--branch", help="Specific branch/revision to clone")
clone_parser.add_argument("--no-health-check", action="store_true", help="Skip health validation before copy")
clone_parser.add_argument("--quiet", action="store_true", help="Suppress progress output")
clone_parser.add_argument("--json", action="store_true", help="Output in JSON format")
clone_parser.add_argument("--force-resume", action="store_true", help="Force resume of partial downloads without prompting")
# Convert command (alpha) - only show if alpha features enabled
if os.getenv("MLXK2_ENABLE_ALPHA_FEATURES"):
convert_parser = subparsers.add_parser(
"convert",
help="ALPHA: Convert workspace to workspace with transformations",
description="Transform model workspaces (repair-index, quantize, etc.)"
)
convert_parser.add_argument("source", help="Source workspace path")
convert_parser.add_argument("target", help="Target workspace path")
convert_parser.add_argument(
"--repair-index",
action="store_true",
help="Rebuild model.safetensors.index.json from shards (fixes mlx-vlm #624)"
)
convert_parser.add_argument(
"--skip-health",
action="store_true",
help="Skip health check on output (debug only)"
)
convert_parser.add_argument("--json", action="store_true", help="Output in JSON format")
# Remove command
rm_parser = subparsers.add_parser("rm", help="Delete a model")
rm_parser.add_argument("model", help="Model name to delete")
rm_parser.add_argument("-f", "--force", action="store_true", help="Delete without confirmation")
rm_parser.add_argument("--json", action="store_true", help="Output in JSON format")
# Run command
run_parser = subparsers.add_parser("run", help="Run model with prompt")
run_parser.add_argument("model", help="Model name to run")
run_parser.add_argument(
"prompt",
nargs="*",
help="Input prompt (optional - interactive if omitted). Use '-' for stdin (requires MLXK2_ENABLE_PIPES=1).",
)
run_parser.add_argument(
"--prompt",
dest="prompt_flag",
help="Input prompt (alternative to positional argument). Useful when prompt comes after --image flag.",
)
run_parser.add_argument(
"--image",
nargs='+',
action="append",
metavar="FILE",
help="Attach image file(s) for vision models. Accepts multiple files per flag or use multiple flags.",
)
run_parser.add_argument(
"--audio",
nargs='+',
action="append",
metavar="FILE",
help="Attach audio file(s) for audio-capable models (e.g., Whisper, Voxtral). Accepts WAV format.",
)
run_parser.add_argument(
"--language",
type=str,
help="Audio language code (e.g., 'en', 'de'). Auto-detect if omitted.",
)
run_parser.add_argument(
"--chunk",
type=int,
default=1,
metavar="N",
help="Process images in batches of N (default: 1 for maximum safety)",
)
run_parser.add_argument("--max-tokens", type=int, help="Maximum tokens to generate")
run_parser.add_argument("--temperature", type=float, default=None, help="Sampling temperature (default: 0.7, audio: 0.0)")
run_parser.add_argument("--top-p", type=float, default=0.9, help="Top-p sampling parameter (default: 0.9)")
run_parser.add_argument("--repetition-penalty", type=float, default=1.1, help="Repetition penalty (default: 1.1)")
run_parser.add_argument("--no-stream", action="store_true", help="Disable streaming output")
run_parser.add_argument("--no-chat-template", action="store_true", help="Disable chat template")
run_parser.add_argument("--no-reasoning", action="store_true", help="Hide reasoning output for reasoning models (show only final answer)")
run_parser.add_argument("--verbose", action="store_true", help="Show detailed output")
run_parser.add_argument("--json", action="store_true", help="Output in JSON format")
# Serve command (primary, ollama-compatible)
serve_parser = subparsers.add_parser("serve", help="Start OpenAI-compatible API server")
serve_parser.add_argument("--model", help="Specific model to pre-load (optional)")
serve_parser.add_argument("--port", type=int, default=8000, help="Port to bind server to (default: 8000)")
serve_parser.add_argument("--host", default="127.0.0.1", help="Host address to bind to (default: 127.0.0.1)")
serve_parser.add_argument("--max-tokens", type=int, help="Default maximum tokens for generation")
serve_parser.add_argument("--reload", action="store_true", help="Enable auto-reload for development")
serve_parser.add_argument("--log-level", default="info", help="Logging level (debug/info/warning/error, default: info)")
serve_parser.add_argument("--log-json", action="store_true", help="Output logs in JSON format (for log aggregation)")
serve_parser.add_argument("--chunk", type=int, default=1, metavar="N", help="Default batch size for vision requests (default: 1 for maximum safety, max: 5)")
serve_parser.add_argument("--verbose", action="store_true", help="Show detailed output")
serve_parser.add_argument("--json", action="store_true", help="Output startup info in JSON format")
# Server command (alias for backward compatibility with 1.x)
_ = subparsers.add_parser(
"server",
help="Start OpenAI-compatible API server (alias for serve)",
parents=[serve_parser],
add_help=False,
)
# Push command - upload local folder to Hugging Face
push_parser = subparsers.add_parser("push", help="Upload a local folder to Hugging Face")
push_parser.add_argument("local_dir", help="Local folder to upload")
push_parser.add_argument("repo_id", help="Target repo as org/model")
push_parser.add_argument("--create", action="store_true", help="Create repository/branch if missing")
# Safety: require --private to avoid accidental public uploads
push_parser.add_argument(
"--private",
action="store_true",
required=True,
help="REQUIRED: Proceed only when targeting a private repo",
)
push_parser.add_argument("--branch", default="main", help="Target branch (default: main)")
push_parser.add_argument("--commit", dest="commit_message", default="mlx-knife push", help="Commit message")
push_parser.add_argument("--verbose", action="store_true", help="Verbose details (human output)")
push_parser.add_argument("--check-only", action="store_true", help="Analyze workspace content; do not upload")
push_parser.add_argument("--dry-run", action="store_true", help="Compute changes against remote; do not upload")
push_parser.add_argument("--json", action="store_true", help="Output in JSON format")
args = parser.parse_args()
try:
# Handle top-level version first
if args.version:
if args.json:
# Build system info object
system_info = {}
memory_bytes = _get_system_memory_bytes()
if memory_bytes is not None:
system_info["memory_total_bytes"] = memory_bytes
result = {
"status": "success",
"command": "version",
"data": {
"cli_version": __version__,
"json_api_spec_version": JSON_API_SPEC_VERSION,
"system": system_info if system_info else None,
},
"error": None,
}
print(format_json_output(result))
else:
# Use the actual command name invoked by the user
cmd_name = os.path.basename(sys.argv[0])
print(f"{cmd_name} {__version__}")
sys.exit(0)
# Initialize result for all paths
result = None
# Execute command and render per mode
if args.command == "list":
result = list_models(pattern=args.pattern)
show_health = getattr(args, "show_health", False)
show_all = getattr(args, "show_all", False)
verbose = getattr(args, "verbose", False)
print_result(result, render_list, args.json,
show_health=show_health, show_all=show_all, verbose=verbose)
elif args.command == "health":
result = health_check_operation(args.model)
print_result(result, render_health, args.json)
elif args.command == "show":
result = show_model_operation(args.model, args.files, args.config)
print_result(result, render_show, args.json)
elif args.command == "pull":
result = pull_operation(args.model)
# Handle resume confirmation
if result.get("data", {}).get("download_status") == "requires_confirmation":
# JSON mode: Return as-is, let caller decide
if args.json:
print_result(result, render_pull, True)
sys.exit(0)
# --force-resume flag: Resume immediately (works in both interactive and non-interactive)
if getattr(args, "force_resume", False):
result = pull_operation(args.model, force_resume=True)
# Non-interactive without --force-resume: Fail with clear error
elif not sys.stdin.isatty():
result["status"] = "error"
result["error"] = {
"type": "requires_confirmation",
"message": result["data"]["message"]
}
print_result(result, None, False)
sys.exit(1)
# Interactive: Prompt user
else:
model_name = result["data"]["model"]
message = result["data"]["message"]
print(f"Model '{model_name}' has partial download:", file=sys.stderr)
print(f" {message}", file=sys.stderr)
response = input("Resume download? [Y/n]: ").strip().lower()
if response not in ("", "y", "yes"):
print("Download cancelled. Partial download kept.", file=sys.stderr)
print(f"Use 'mlxk rm {model_name}' to delete if needed.", file=sys.stderr)
sys.exit(0)
# User confirmed - retry pull with force_resume
result = pull_operation(args.model, force_resume=True)
print_result(result, render_pull, args.json)
elif args.command == "clone":
# Handle branch parameter by modifying model spec
model_spec = args.model
if getattr(args, "branch", None):
# If --branch is provided, append it to model spec
model_spec = f"{args.model}@{args.branch}"
from .operations.clone import clone_operation
result = clone_operation(
model_spec=model_spec,
target_dir=args.target_dir,
health_check=not getattr(args, "no_health_check", False),
force_resume=getattr(args, "force_resume", False)
)
print_result(result, render_clone, args.json,
quiet=getattr(args, "quiet", False))
elif args.command == "convert":
# Check if alpha features are enabled (should not reach here if not, but double-check)
if not os.getenv("MLXK2_ENABLE_ALPHA_FEATURES"):
result = handle_error("CommandError", "Convert command requires MLXK2_ENABLE_ALPHA_FEATURES=1")
print_result(result, None, True) # Always JSON for this error
sys.exit(1)
from .operations.convert import convert_operation
# Validate mode flags
if args.repair_index:
mode = "repair-index"
else:
print("Error: Must specify conversion mode (--repair-index)", file=sys.stderr)
sys.exit(1)
result = convert_operation(
args.source,
args.target,
mode=mode,
skip_health=args.skip_health
)
# Import render_convert from output.human
from .output.human import render_convert
print_result(result, render_convert, args.json)
elif args.command == "rm":
result = rm_operation(args.model, args.force)
print_result(result, render_rm, args.json)
elif args.command == "run":
# Support both positional prompt and --prompt flag (UX improvement)
# IMPORTANT: Check for stdin ("-") FIRST before applying prompt_flag precedence
prompt_value = None
pipes_enabled = bool(os.getenv("MLXK2_ENABLE_PIPES"))
# Normalize positional args
positional_prompt = args.prompt if isinstance(args.prompt, list) else ([args.prompt] if args.prompt is not None else [])
# Check if stdin ("-") is in positional args
has_stdin = "-" in positional_prompt if positional_prompt else False
if has_stdin:
# Stdin mode: Read from pipe
if not pipes_enabled:
result = handle_error("CommandError", "Pipe mode requires MLXK2_ENABLE_PIPES=1")
print_result(result, None, True if args.json else False)
sys.exit(1)
stdin_content = sys.stdin.read()
# Combine stdin with --prompt flag if both present
if hasattr(args, 'prompt_flag') and args.prompt_flag:
# "- --prompt text" → combine stdin + flag
prompt_value = f"{stdin_content}\n\n{args.prompt_flag}"
else:
# "- additional text" → combine stdin + positional
additional_parts = [p for p in positional_prompt if p != "-"]
if additional_parts:
prompt_value = f"{stdin_content}\n\n{' '.join(additional_parts)}"
else:
prompt_value = stdin_content
elif hasattr(args, 'prompt_flag') and args.prompt_flag:
# --prompt flag (no stdin)
prompt_value = args.prompt_flag
elif positional_prompt:
# Positional prompt (no stdin, no flag)
prompt_value = " ".join(positional_prompt)
image_inputs = []
images = getattr(args, "image", None) or []
# Flatten nested list from nargs='+' + action='append'
# [[a.jpg, b.jpg], [c.jpg]] → [a.jpg, b.jpg, c.jpg]
if images and isinstance(images[0], list):
images = [item for sublist in images for item in sublist]
if images:
for image_path in images:
img_path = Path(image_path)
if not img_path.exists() or not img_path.is_file():
result = handle_error("CommandError", f"Image not found: {image_path}")
print_result(result, None, True if args.json else False)
sys.exit(1)
data = img_path.read_bytes()
# Increased from 2MB to 10MB after Session 9 validation (mlx-vlm handles larger images fine)
if len(data) > 10 * 1024 * 1024:
result = handle_error("CommandError", f"Image too large (>10MB): {image_path}")
print_result(result, None, True if args.json else False)
sys.exit(1)
image_inputs.append((img_path.name, data))
if prompt_value is None:
prompt_value = "Describe the image."
# Audio file processing (ADR-019 Phase 2)
audio_inputs = []
audios = getattr(args, "audio", None) or []
# Flatten nested list from nargs='+' + action='append'
if audios and isinstance(audios[0], list):
audios = [item for sublist in audios for item in sublist]
if audios:
for audio_path in audios:
aud_path = Path(audio_path)
if not aud_path.exists() or not aud_path.is_file():
result = handle_error("CommandError", f"Audio file not found: {audio_path}")
print_result(result, None, True if args.json else False)
sys.exit(1)
data = aud_path.read_bytes()
# 50MB limit for audio (~15 min at 16kHz mono)
# Note: Gemma-3n ~30s (token limit), Voxtral >10min (larger token capacity)
# Token count is the real constraint, file size is just a sanity check
if len(data) > 50 * 1024 * 1024:
result = handle_error("CommandError", f"Audio file too large (>50MB): {audio_path}")
print_result(result, None, True if args.json else False)
sys.exit(1)
audio_inputs.append((aud_path.name, data))
# Multi-audio not supported by mlx-vlm (token mismatch bug)
if len(audio_inputs) > 1:
result = handle_error("CommandError", "Multiple audio files not supported. Process one file at a time.")
print_result(result, None, args.json)
sys.exit(1)
if prompt_value is None:
# Simple prompt - complex prompts cause multilingual drift in Gemma-3n with MP3
prompt_value = "Transcribe this audio."
stream_mode = not args.no_stream
if image_inputs or audio_inputs:
stream_mode = False
elif not sys.stdout.isatty() and not args.json:
stream_mode = False
# Context-aware temperature default (audio: 0.0 greedy for STT, else: 0.7)
if args.temperature is None:
temperature = 0.0 if audio_inputs else 0.7
else:
temperature = args.temperature
# Handle run command with proper parameter mapping
result_text = run_model_enhanced(
model_spec=args.model,
prompt=prompt_value, # Can be None for interactive mode
images=image_inputs if images else None,
audio=audio_inputs if audios else None,
chunk=args.chunk,
stream=stream_mode,
max_tokens=getattr(args, "max_tokens", None),
temperature=temperature,
top_p=getattr(args, "top_p", 0.9),
repetition_penalty=getattr(args, "repetition_penalty", 1.1),
use_chat_template=not getattr(args, "no_chat_template", False),
json_output=args.json,
verbose=getattr(args, "verbose", False),
system_prompt=None, # Not yet implemented
hide_reasoning=getattr(args, "no_reasoning", False),
language=getattr(args, "language", None),
)
# Detect errors from run_model_enhanced (returns "Error: ..." string on failure)
# This check must happen BEFORE the JSON/text mode split
if result_text and isinstance(result_text, str) and result_text.startswith("Error: "):
error_message = result_text[7:] # Strip "Error: " prefix
result = {
"status": "error",
"command": "run",
"data": None,
"error": {
"type": "execution_error",
"message": error_message
}
}
# Note: run_model() already printed error to stderr in text mode
if args.json:
print_result(result, None, True)
# Exit code will be 1 (handled by line 369)
elif args.json and result_text is not None and prompt_value is not None:
# Success case: wrap result in standard format (only for single-shot mode)
result = {
"status": "success",
"command": "run",
"data": {
"model": args.model,
"prompt": prompt_value,
"response": result_text
},
"error": None
}
print(format_json_output(result))
else:
# For non-JSON or interactive mode, set success result
result = {"status": "success"}
elif args.command in ["serve", "server"]: # Handle both serve and server aliases
# Handle serve command
if args.json:
# JSON startup info
server_info = {
"status": "starting",
"command": "serve",
"data": {
"host": args.host,
"port": args.port,
"model": getattr(args, "model", None),
"max_tokens": getattr(args, "max_tokens", None),
},
"error": None
}
print(format_json_output(server_info))
# Set MLXK2_LOG_JSON if --log-json flag is present
if getattr(args, "log_json", False):
os.environ["MLXK2_LOG_JSON"] = "1"
# Start server (this will run indefinitely)
# Lazy import to avoid hard dependency on FastAPI/uvicorn at import time
from .operations.serve import start_server
start_server(
model=getattr(args, "model", None),
port=args.port,
host=args.host,
max_tokens=getattr(args, "max_tokens", None),
reload=getattr(args, "reload", False),
log_level=getattr(args, "log_level", "info"),
chunk=getattr(args, "chunk", 1),
verbose=getattr(args, "verbose", False),
supervise=True
)
# Should never reach here (server runs indefinitely)
result = {"status": "success"}
elif args.command == "push":
result = push_operation(
local_dir=args.local_dir,
repo_id=args.repo_id,
create=getattr(args, "create", False),
private=getattr(args, "private", False),
branch=getattr(args, "branch", None),
commit_message=getattr(args, "commit_message", None),
check_only=getattr(args, "check_only", False),
dry_run=getattr(args, "dry_run", False),
# Quiet mode: when emitting JSON without --verbose, suppress hub progress/log noise
quiet=(getattr(args, "json", False) and not getattr(args, "verbose", False)),
)
from .output.human import render_push
print_result(result, render_push, args.json,
verbose=getattr(args, "verbose", False))
elif args.command is None:
# No command specified - show help or JSON error depending on --json flag
if args.json:
result = handle_error("CommandError", "No command specified")
print(format_json_output(result), file=sys.stdout)
sys.exit(1)
else:
parser.print_help()
sys.exit(2)
else:
# Unknown command - show help or JSON error depending on --json flag
if args.json:
result = handle_error("CommandError", f"Unknown command: {args.command}")
print(format_json_output(result), file=sys.stdout)
sys.exit(1)
else:
parser.print_help()
sys.exit(2)
# Exit with appropriate code (only reached for successful commands)
sys.exit(0 if result.get("status") == "success" else 1)
except Exception as e:
# Check if --json flag was requested
want_json = "--json" in sys.argv
if want_json:
error_result = handle_error("InternalError", str(e))
print(format_json_output(error_result), file=sys.stdout)
else:
# Human-mode error
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()