mirror of
https://github.com/cloudstack-llc/mlx-knife.git
synced 2026-07-01 20:44:14 -04:00
d32d3185dd
Stable release completing Issue #32 recovery plan - all tests passing. Bug Fixes: - Test collection regression (E2E suite parametrization) - Stop token ordering (batch + streaming modes) - E2E test temperature flakiness (deterministic sampling) - Web API framework detection (PR #42 by @limey, fixes #41) - E2E test marker fix (show_model_portfolio diagnostics) Architecture: - mlx-lm API evaluation: Keep manual text-based implementation - Stop token workarounds: All 3 validated (Phi-3, DeepSeek-R1, GPT-oss) Testing: - Portfolio Discovery: 73/81 tests, 17 models, 0 failures - E2E infrastructure hardened (TOKENIZERS, polling, gc.collect()) - Multi-Python validation: 3.9-3.13 passing Documentation: - ADR-009 Outstanding Work completed + Implementation Plan removed - TESTING-DETAILS.md: Portfolio Discovery + E2E Architecture updated - CHANGELOG.md: Complete 2.0.2 stable release notes
48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
"""Shared utilities for live E2E tests (ADR-011).
|
|
|
|
Provides:
|
|
- Portfolio discovery functions (reused from test_stop_tokens_live.py)
|
|
- RAM gating utilities
|
|
- Common test constants
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Tuple
|
|
|
|
# Import portfolio discovery infrastructure from test_stop_tokens_live.py
|
|
_parent_dir = Path(__file__).parent.parent
|
|
sys.path.insert(0, str(_parent_dir))
|
|
|
|
try:
|
|
from test_stop_tokens_live import (
|
|
discover_mlx_models_in_user_cache,
|
|
get_safe_ram_budget_gb,
|
|
get_system_ram_gb,
|
|
should_skip_model,
|
|
TEST_MODELS,
|
|
)
|
|
finally:
|
|
sys.path.remove(str(_parent_dir))
|
|
|
|
|
|
# Re-export for convenience
|
|
__all__ = [
|
|
"discover_mlx_models_in_user_cache",
|
|
"get_safe_ram_budget_gb",
|
|
"get_system_ram_gb",
|
|
"should_skip_model",
|
|
"TEST_MODELS",
|
|
"TEST_PROMPT",
|
|
"MAX_TOKENS",
|
|
"TEST_TEMPERATURE",
|
|
]
|
|
|
|
|
|
# Standard test constants (shared across all E2E tests)
|
|
TEST_PROMPT = "Write one sentence about cats."
|
|
MAX_TOKENS = 50
|
|
TEST_TEMPERATURE = 0.0 # Deterministic sampling for reproducible tests
|