Compare commits

...

6 Commits

Author SHA1 Message Date
Cursor Agent 952dfcb5ed feat: Add FakeLlamaCloudServer for testing
Co-authored-by: adrian <adrian@runllama.ai>
2025-11-24 05:34:40 +00:00
Adrian Lyjak 8aa5ab0756 plan 2025-11-24 00:05:01 -05:00
Adrian Lyjak 30e36f3cc3 docs_improved2 2025-11-23 21:10:58 -05:00
Adrian Lyjak 549efbd8b9 docs_improved 2025-11-23 20:50:56 -05:00
Adrian Lyjak d312dc9890 add docs.md 2025-11-23 19:51:37 -05:00
Adrian Lyjak 10ab5fd2b9 planish 2025-11-23 15:50:36 -05:00
15 changed files with 2659 additions and 8 deletions
+2
View File
@@ -1,5 +1,6 @@
from llama_cloud_services.parse import LlamaParse
from llama_cloud_services.extract import LlamaExtract, ExtractionAgent
from llama_cloud_services.testing_utils import FakeLlamaCloudServer
from llama_cloud_services.utils import SourceText, FileInput
from llama_cloud_services.constants import EU_BASE_URL
from llama_cloud_services.index import (
@@ -18,4 +19,5 @@ __all__ = [
"LlamaCloudIndex",
"LlamaCloudRetriever",
"LlamaCloudCompositeRetriever",
"FakeLlamaCloudServer",
]
@@ -0,0 +1,9 @@
from .matchers import FileMatcher, RequestMatcher, SchemaMatcher
from .server import FakeLlamaCloudServer
__all__ = [
"FakeLlamaCloudServer",
"FileMatcher",
"SchemaMatcher",
"RequestMatcher",
]
@@ -0,0 +1,186 @@
from __future__ import annotations
import hashlib
import json
import random
from datetime import datetime, timezone
from typing import Any, Iterable, Mapping, MutableMapping
def hash_chunks(chunks: Iterable[bytes]) -> str:
digest = hashlib.sha256()
for chunk in chunks:
digest.update(chunk)
return digest.hexdigest()
def fingerprint_file(content: bytes, filename: str | None = None) -> str:
name_bytes = filename.encode("utf-8") if filename else b""
return hash_chunks((content, name_bytes))
def hash_schema(schema: Any) -> str:
json_string = json.dumps(
_to_serializable(schema),
sort_keys=True,
separators=(",", ":"),
)
return hashlib.sha256(json_string.encode("utf-8")).hexdigest()
def combined_seed(*parts: str) -> int:
digest = hash_chunks(tuple(part.encode("utf-8") for part in parts))
return int(digest[:16], 16)
def generate_data_from_schema(schema: Any, seed: int) -> Any:
rng = random.Random(seed)
return _generate_value(schema, rng, depth=0)
def generate_text_blob(seed: int, sentences: int = 3) -> str:
rng = random.Random(seed)
words = [
"aurora",
"copper",
"delta",
"ember",
"fable",
"glyph",
"harbor",
"iris",
"juniper",
"kepler",
"lumen",
"monarch",
"nylon",
"onyx",
"paragon",
"quartz",
"raptor",
"solstice",
"topaz",
"umbra",
"verdant",
"willow",
"xenon",
"yonder",
"zephyr",
]
sentence_pieces = []
for _ in range(sentences):
length = rng.randint(6, 12)
chosen = rng.sample(words, k=length)
sentence = " ".join(chosen).capitalize() + "."
sentence_pieces.append(sentence)
return " ".join(sentence_pieces)
def utcnow() -> datetime:
return datetime.now(timezone.utc)
def _to_serializable(value: Any) -> Any:
if value is None:
return None
if isinstance(value, (str, int, float, bool)):
return value
if isinstance(value, bytes):
return value.decode("utf-8", errors="ignore")
if isinstance(value, Mapping):
return {key: _to_serializable(val) for key, val in value.items()}
if isinstance(value, MutableMapping):
return {key: _to_serializable(val) for key, val in value.items()}
if isinstance(value, (list, tuple, set)):
return [_to_serializable(item) for item in value]
if hasattr(value, "model_dump_json"):
return json.loads(value.model_dump_json())
if hasattr(value, "model_dump"):
return value.model_dump()
if hasattr(value, "dict"):
return value.dict() # type: ignore[call-arg]
if hasattr(value, "model_json_schema"):
return value.model_json_schema()
return str(value)
def _generate_value(schema: Any, rng: random.Random, depth: int) -> Any:
if depth > 8:
return rng.choice(
(
rng.randint(1, 999),
rng.random(),
generate_text_blob(rng.randint(0, 1_000_000), sentences=1),
)
)
if schema is None:
return generate_text_blob(rng.randint(0, 1_000_000), sentences=1)
if isinstance(schema, list):
return [_generate_value(item, rng, depth + 1) for item in schema]
if isinstance(schema, str):
return f"{schema}-{rng.randint(100, 999)}"
if isinstance(schema, Mapping):
if "enum" in schema:
options = schema["enum"]
if options:
index = rng.randint(0, len(options) - 1)
return options[index]
schema_type = schema.get("type")
if schema_type == "object":
properties = schema.get("properties", {})
result = {}
for key, subschema in properties.items():
result[key] = _generate_value(subschema, rng, depth + 1)
return result
if schema_type == "array":
items_schema = schema.get("items", {})
min_items = schema.get("minItems", 1)
max_items = schema.get("maxItems", max(3, min_items))
length = rng.randint(min_items, min(min_items + 2, max_items))
return [_generate_value(items_schema, rng, depth + 1) for _ in range(length)]
if schema_type == "integer":
minimum = schema.get("minimum", 0)
maximum = schema.get("maximum", minimum + 500)
return rng.randint(int(minimum), int(maximum))
if schema_type == "number":
minimum = schema.get("minimum", 0.0)
maximum = schema.get("maximum", minimum + 500.0)
value = rng.uniform(float(minimum), float(maximum))
return round(value, 2)
if schema_type == "boolean":
return rng.choice((True, False))
if schema_type == "string":
fmt = schema.get("format")
if fmt == "date-time":
timestamp = utcnow().isoformat()
return timestamp
if fmt == "email":
return f"user{rng.randint(1000, 9999)}@example.com"
if fmt == "uri":
return f"https://example.com/{rng.randint(1000, 9999)}"
min_length = schema.get("minLength", 5)
max_length = schema.get("maxLength", max(10, min_length))
length = rng.randint(min_length, min(min_length + 5, max_length))
return generate_text_blob(rng.randint(0, 1_000_000), sentences=max(1, length // 5))
if "oneOf" in schema:
option = rng.choice(schema["oneOf"])
return _generate_value(option, rng, depth + 1)
if "anyOf" in schema:
option = rng.choice(schema["anyOf"])
return _generate_value(option, rng, depth + 1)
return generate_text_blob(rng.randint(0, 1_000_000), sentences=1)
@@ -0,0 +1,137 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, List
import httpx
from llama_cloud.types import (
ClassifierRule,
ClassifyJob,
ClassifyJobResults,
ClassificationResult,
FileClassification,
StatusEnum,
)
from ._deterministic import combined_seed, utcnow
from .files import FakeFilesNamespace, StoredFile
if TYPE_CHECKING:
from .server import FakeLlamaCloudServer
@dataclass(slots=True)
class ClassificationJobRecord:
job: ClassifyJob
results: ClassifyJobResults
files: List[StoredFile]
class FakeClassifyNamespace:
def __init__(self, *, server: "FakeLlamaCloudServer", files: FakeFilesNamespace) -> None:
self._server = server
self._files = files
self._jobs: Dict[str, ClassificationJobRecord] = {}
def register(self) -> None:
server = self._server
server.add_route(
"POST",
"/api/v1/classifier/jobs",
self._handle_create_job,
namespace="classify",
)
server.add_route(
"GET",
"/api/v1/classifier/jobs",
self._handle_list_jobs,
namespace="classify",
)
server.add_route(
"GET",
"/api/v1/classifier/jobs/{job_id}",
self._handle_get_job,
namespace="classify",
)
server.add_route(
"GET",
"/api/v1/classifier/jobs/{job_id}/results",
self._handle_get_results,
namespace="classify",
)
def _handle_create_job(self, request: httpx.Request) -> httpx.Response:
payload = self._server.json(request)
file_ids = payload.get("file_ids", [])
rules_payload = payload.get("rules", [])
rules = [ClassifierRule.parse_obj(rule) for rule in rules_payload]
stored_files = []
for file_id in file_ids:
stored = self._files.get(file_id)
if not stored:
return self._server.json_response({"detail": f"File {file_id} not found"}, status_code=404)
stored_files.append(stored)
job_id = self._server.new_id("classify-job")
job = ClassifyJob(
id=job_id,
project_id=request.url.params.get("project_id", self._server.default_project_id),
user_id="fake-user",
rules=rules,
parsing_configuration=None,
status=StatusEnum.SUCCESS,
created_at=utcnow(),
updated_at=utcnow(),
effective_at=utcnow(),
error_message=None,
job_record_id=None,
)
results = self._build_results(job_id, stored_files, rules)
record = ClassificationJobRecord(job=job, results=results, files=stored_files)
self._jobs[job_id] = record
return self._server.json_response(job.dict())
def _handle_list_jobs(self, request: httpx.Request) -> httpx.Response:
return self._server.json_response([record.job.dict() for record in self._jobs.values()])
def _handle_get_job(self, request: httpx.Request) -> httpx.Response:
job_id = request.url.path.split("/")[-1]
record = self._jobs.get(job_id)
if not record:
return self._server.json_response({"detail": "Job not found"}, status_code=404)
return self._server.json_response(record.job.dict())
def _handle_get_results(self, request: httpx.Request) -> httpx.Response:
job_id = request.url.path.split("/")[-2]
record = self._jobs.get(job_id)
if not record:
return self._server.json_response({"detail": "Results not found"}, status_code=404)
return self._server.json_response(record.results.dict())
def _build_results(
self,
job_id: str,
stored_files: List[StoredFile],
rules: List[ClassifierRule],
) -> ClassifyJobResults:
items: List[FileClassification] = []
for stored in stored_files:
seed = combined_seed(stored.sha256, job_id)
rule_index = seed % len(rules) if rules else 0
predicted_type = rules[rule_index].type if rules else "unlabeled"
confidence = 0.55 + (seed % 40) / 100
reasoning = f"Selected rule '{predicted_type}' using deterministic seed {seed}."
classification = FileClassification(
id=self._server.new_id("classification"),
file_id=stored.file.id,
classify_job_id=job_id,
created_at=utcnow(),
updated_at=utcnow(),
result=ClassificationResult(
type=predicted_type,
confidence=min(confidence, 0.95),
reasoning=reasoning,
),
)
items.append(classification)
return ClassifyJobResults(items=items, next_page_token=None, total_size=len(items))
@@ -0,0 +1,633 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, List, Optional
import httpx
from llama_cloud.types import (
ExtractAgent,
ExtractConfig,
ExtractJob,
ExtractRun,
ExtractState,
File as CloudFile,
PaginatedExtractRunsResponse,
StatusEnum,
)
from ._deterministic import combined_seed, generate_data_from_schema, hash_schema, utcnow
from ._deterministic import fingerprint_file
from .files import FakeFilesNamespace, StoredFile
from .matchers import RequestContext, RequestMatcher
if TYPE_CHECKING:
from .server import FakeLlamaCloudServer
@dataclass(slots=True)
class ExtractRunStub:
matcher: Optional[RequestMatcher]
data: Optional[Any]
status: Optional[str]
metadata: Optional[Dict[str, Any]]
error: Optional[str]
job_status: Optional[str]
once: bool
@dataclass(slots=True)
class AgentRunStub:
agent_id: str
matcher: Optional[RequestMatcher]
job_status: Optional[str]
run_status: Optional[str]
error: Optional[str]
once: bool
@dataclass(slots=True)
class StoredRun:
job: ExtractJob
run: ExtractRun
class FakeExtractNamespace:
def __init__(
self,
*,
server: "FakeLlamaCloudServer",
files: FakeFilesNamespace,
) -> None:
self._server = server
self._files = files
self._jobs: Dict[str, StoredRun] = {}
self._runs: Dict[str, ExtractRun] = {}
self._agents: Dict[str, ExtractAgent] = {}
self._agents_by_name: Dict[str, str] = {}
self._run_stubs: List[ExtractRunStub] = []
self._agent_run_stubs: List[AgentRunStub] = []
self.routes: Dict[str, Any] = {}
# Public APIs ----------------------------------------------------
def stub_run(
self,
matcher: Optional[RequestMatcher],
*,
data: Optional[Any] = None,
status: Optional[str] = None,
job_status: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
error: Optional[str] = None,
once: bool = True,
) -> None:
self._run_stubs.append(
ExtractRunStub(
matcher=matcher,
data=data,
status=status,
metadata=metadata,
error=error,
job_status=job_status,
once=once,
)
)
def stub_agent_run(
self,
*,
agent_id: str,
matcher: Optional[RequestMatcher],
job_status: Optional[str] = None,
run_status: Optional[str] = None,
error: Optional[str] = None,
once: bool = True,
) -> None:
self._agent_run_stubs.append(
AgentRunStub(
agent_id=agent_id,
matcher=matcher,
job_status=job_status,
run_status=run_status,
error=error,
once=once,
)
)
# Route registration ---------------------------------------------
def register(self) -> None:
server = self._server
route = server.add_route(
"POST",
"/api/v1/extraction/run",
self._handle_stateless_run,
namespace="extract",
alias="extract_run",
)
self.routes["stateless_run"] = route
self.stateless_run = route
server.add_route(
"POST",
"/api/v1/extraction/extraction-agents",
self._handle_create_agent,
namespace="extract",
)
server.add_route(
"PATCH",
"/api/v1/extraction/extraction-agents/{agent_id}",
self._handle_update_agent,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/extraction-agents/{agent_id}",
self._handle_get_agent,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/extraction-agents/by-name/{name}",
self._handle_get_agent_by_name,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/extraction-agents",
self._handle_list_agents,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/extraction-agents/default",
self._handle_get_default_agent,
namespace="extract",
)
server.add_route(
"DELETE",
"/api/v1/extraction/extraction-agents/{agent_id}",
self._handle_delete_agent,
namespace="extract",
)
server.add_route(
"POST",
"/api/v1/extraction/extraction-agents/schema/validation",
self._handle_validate_schema,
namespace="extract",
)
agent_job_route = server.add_route(
"POST",
"/api/v1/extraction/jobs",
self._handle_agent_job,
namespace="extract",
alias="agent_job",
)
self.routes["agent_job"] = agent_job_route
self.agent_job = agent_job_route
server.add_route(
"POST",
"/api/v1/extraction/jobs/batch",
self._handle_agent_job_batch,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/jobs",
self._handle_list_jobs,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/jobs/{job_id}",
self._handle_get_job,
namespace="extract",
)
agent_run_route = server.add_route(
"GET",
"/api/v1/extraction/runs/by-job/{job_id}",
self._handle_get_run_by_job,
namespace="extract",
alias="agent_run",
)
self.routes["agent_run"] = agent_run_route
self.agent_run = agent_run_route
server.add_route(
"GET",
"/api/v1/extraction/runs/{run_id}",
self._handle_get_run,
namespace="extract",
)
server.add_route(
"DELETE",
"/api/v1/extraction/runs/{run_id}",
self._handle_delete_run,
namespace="extract",
)
server.add_route(
"GET",
"/api/v1/extraction/runs",
self._handle_list_runs,
namespace="extract",
)
# Handlers -------------------------------------------------------
def _handle_stateless_run(self, request: httpx.Request) -> httpx.Response:
payload = self._server.json(request)
config = ExtractConfig.parse_obj(payload["config"])
data_schema = payload["data_schema"]
schema_hash = hash_schema(data_schema)
file_info = self._extract_file_info(payload, request)
agent = self._build_ephemeral_agent(config, data_schema, file_info.file.project_id)
context = RequestContext(
request=request,
json=payload,
file_id=file_info.file.id,
filename=file_info.file.name,
file_sha256=file_info.sha256,
schema_hash=schema_hash,
project_id=file_info.file.project_id,
organization_id=self._server.default_organization_id,
)
stub = self._pop_stub(self._run_stubs, context)
job_status = StatusEnum.SUCCESS
run_status = ExtractState.SUCCESS
metadata = {"deterministic": {"value": True}}
error = None
run_data = self._generate_run_data(data_schema, file_info.sha256)
if stub:
if stub.job_status:
job_status = StatusEnum(stub.job_status)
if stub.status:
run_status = ExtractState(stub.status)
if stub.metadata:
metadata = stub.metadata
if stub.error:
error = stub.error
if stub.data is not None:
if callable(stub.data):
run_data = stub.data(payload) # type: ignore[assignment]
else:
run_data = stub.data
stored = self._create_job_and_run(
agent=agent,
config=config,
data_schema=data_schema,
file_info=file_info,
job_status=job_status,
run_status=run_status,
metadata=metadata,
data=run_data,
error=error,
project_id=file_info.file.project_id,
)
return self._server.json_response(stored.job.dict())
def _handle_create_agent(self, request: httpx.Request) -> httpx.Response:
payload = self._server.json(request)
name = payload["name"]
config = ExtractConfig.parse_obj(payload["config"])
data_schema = payload["data_schema"]
agent_id = self._server.new_id("agent")
agent = ExtractAgent(
id=agent_id,
name=name,
config=config,
data_schema=data_schema,
project_id=request.url.params.get("project_id", self._server.default_project_id),
created_at=utcnow(),
updated_at=utcnow(),
custom_configuration=None,
)
self._agents[agent_id] = agent
self._agents_by_name[name] = agent_id
return self._server.json_response(agent.dict())
def _handle_update_agent(self, request: httpx.Request) -> httpx.Response:
agent_id = request.url.path.split("/")[-1]
if agent_id not in self._agents:
return self._server.json_response({"detail": "Agent not found"}, status_code=404)
payload = self._server.json(request)
agent = self._agents[agent_id]
config = payload.get("config", agent.config)
data_schema = payload.get("data_schema", agent.data_schema)
updated = agent.copy(
update={
"config": ExtractConfig.parse_obj(config) if isinstance(config, dict) else config,
"data_schema": data_schema,
"updated_at": utcnow(),
}
)
self._agents[agent_id] = updated
return self._server.json_response(updated.dict())
def _handle_get_agent(self, request: httpx.Request) -> httpx.Response:
agent_id = request.url.path.split("/")[-1]
agent = self._agents.get(agent_id)
if not agent:
return self._server.json_response({"detail": "Agent not found"}, status_code=404)
return self._server.json_response(agent.dict())
def _handle_get_agent_by_name(self, request: httpx.Request) -> httpx.Response:
name = request.url.path.split("/")[-1]
agent_id = self._agents_by_name.get(name)
if not agent_id:
return self._server.json_response({"detail": "Agent not found"}, status_code=404)
return self._server.json_response(self._agents[agent_id].dict())
def _handle_list_agents(self, request: httpx.Request) -> httpx.Response:
include_default = request.url.params.get("include_default", "false").lower() == "true"
agents = list(self._agents.values())
if include_default and not agents:
default_agent = self._build_ephemeral_agent(
ExtractConfig(),
{"type": "object", "properties": {}},
self._server.default_project_id,
)
agents.append(default_agent)
return self._server.json_response([agent.dict() for agent in agents])
def _handle_get_default_agent(self, request: httpx.Request) -> httpx.Response:
if self._agents:
agent = next(iter(self._agents.values()))
else:
agent = self._build_ephemeral_agent(
ExtractConfig(),
{"type": "object", "properties": {}},
self._server.default_project_id,
)
return self._server.json_response(agent.dict())
def _handle_delete_agent(self, request: httpx.Request) -> httpx.Response:
agent_id = request.url.path.split("/")[-1]
agent = self._agents.pop(agent_id, None)
if agent:
self._agents_by_name.pop(agent.name, None)
return self._server.json_response({}, status_code=200)
def _handle_validate_schema(self, request: httpx.Request) -> httpx.Response:
payload = self._server.json(request)
return self._server.json_response({"data_schema": payload["data_schema"]})
def _handle_agent_job(self, request: httpx.Request) -> httpx.Response:
payload = self._server.json(request)
agent_id = payload["extraction_agent_id"]
agent = self._agents.get(agent_id)
if not agent:
return self._server.json_response({"detail": "Agent not found"}, status_code=404)
file_id = payload["file_id"]
stored_file = self._files._files.get(file_id)
if not stored_file:
return self._server.json_response({"detail": "File not found"}, status_code=404)
schema = payload.get("data_schema_override", agent.data_schema)
config_payload = payload.get("config_override", agent.config)
config = ExtractConfig.parse_obj(config_payload) if isinstance(config_payload, dict) else config_payload
schema_hash = hash_schema(schema)
stub = self._pop_agent_stub(agent_id, RequestContext(request=request, json=payload))
job_status = StatusEnum.SUCCESS
run_status = ExtractState.SUCCESS
error = None
if stub:
if stub.job_status:
job_status = StatusEnum(stub.job_status)
if stub.run_status:
run_status = ExtractState(stub.run_status)
if stub.error:
error = stub.error
stored = self._create_job_and_run(
agent=agent,
config=config,
data_schema=schema,
file_info=stored_file,
job_status=job_status,
run_status=run_status,
metadata={"agent": {"value": agent.id}},
data=self._generate_run_data(schema, stored_file.sha256),
error=error,
project_id=agent.project_id,
)
return self._server.json_response(stored.job.dict())
def _handle_agent_job_batch(self, request: httpx.Request) -> httpx.Response:
payload = self._server.json(request)
file_ids = payload.get("file_ids", [])
jobs = []
for file_id in file_ids:
request_body = payload.copy()
request_body["file_id"] = file_id
fake_request = request.copy()
fake_request._content = self._server.encode_json(request_body)
response = self._handle_agent_job(fake_request)
if response.status_code != 200:
return response
jobs.append(response.json())
return self._server.json_response(jobs)
def _handle_list_jobs(self, request: httpx.Request) -> httpx.Response:
agent_id = request.url.params.get("extraction_agent_id")
items = []
for stored in self._jobs.values():
if agent_id and stored.job.extraction_agent.id != agent_id:
continue
items.append(stored.job.dict())
return self._server.json_response(items)
def _handle_get_job(self, request: httpx.Request) -> httpx.Response:
job_id = request.url.path.split("/")[-1]
stored = self._jobs.get(job_id)
if not stored:
return self._server.json_response({"detail": "Job not found"}, status_code=404)
return self._server.json_response(stored.job.dict())
def _handle_get_run_by_job(self, request: httpx.Request) -> httpx.Response:
job_id = request.url.path.split("/")[-1]
stored = self._jobs.get(job_id)
if not stored:
return self._server.json_response({"detail": "Run not found"}, status_code=404)
return self._server.json_response(stored.run.dict())
def _handle_get_run(self, request: httpx.Request) -> httpx.Response:
run_id = request.url.path.split("/")[-1]
run = self._runs.get(run_id)
if not run:
return self._server.json_response({"detail": "Run not found"}, status_code=404)
return self._server.json_response(run.dict())
def _handle_delete_run(self, request: httpx.Request) -> httpx.Response:
run_id = request.url.path.split("/")[-1]
self._runs.pop(run_id, None)
to_delete = [job_id for job_id, stored in self._jobs.items() if stored.run.id == run_id]
for job_id in to_delete:
self._jobs.pop(job_id, None)
return self._server.json_response({}, status_code=200)
def _handle_list_runs(self, request: httpx.Request) -> httpx.Response:
agent_id = request.url.params.get("extraction_agent_id")
skip = int(request.url.params.get("skip", "0"))
limit = int(request.url.params.get("limit", "50"))
filtered = [
stored.run
for stored in self._jobs.values()
if not agent_id or stored.job.extraction_agent.id == agent_id
]
page = filtered[skip : skip + limit]
response = PaginatedExtractRunsResponse(
items=page,
skip=skip,
limit=limit,
total=len(filtered),
)
return self._server.json_response(response.dict())
# Internal helpers -----------------------------------------------
def _extract_file_info(self, payload: Dict[str, Any], request: httpx.Request) -> StoredFile:
if "file_id" in payload:
file_id = payload["file_id"]
stored = self._files.get(file_id)
if not stored:
raise ValueError("file_id not found in fake store")
return stored
if "file" in payload:
content, filename = self._files.decode_file_data(payload)
file_id = self._server.new_id("file")
stored = StoredFile(
file=CloudFile(
id=file_id,
name=filename or f"inline-{file_id}",
project_id=request.url.params.get("project_id", self._server.default_project_id),
external_file_id=None,
file_size=len(content),
file_type=None,
created_at=utcnow(),
updated_at=utcnow(),
data_source_id=None,
permission_info=None,
resource_info=None,
last_modified_at=utcnow(),
),
content=content,
sha256=fingerprint_file(content, filename),
)
return stored
if "text" in payload:
text_bytes = payload["text"].encode("utf-8")
file_id = self._server.new_id("file")
stored = StoredFile(
file=CloudFile(
id=file_id,
name=f"text-{file_id}.txt",
project_id=self._server.default_project_id,
external_file_id=None,
file_size=len(text_bytes),
file_type="text/plain",
created_at=utcnow(),
updated_at=utcnow(),
data_source_id=None,
permission_info=None,
resource_info=None,
last_modified_at=utcnow(),
),
content=text_bytes,
sha256=fingerprint_file(text_bytes, None),
)
return stored
raise ValueError("file payload missing")
def _build_ephemeral_agent(
self,
config: ExtractConfig,
data_schema: Dict[str, Any],
project_id: str,
) -> ExtractAgent:
return ExtractAgent(
id=self._server.new_id("agent"),
name="stateless-agent",
config=config,
data_schema=data_schema,
project_id=project_id,
created_at=utcnow(),
updated_at=utcnow(),
custom_configuration=None,
)
def _generate_run_data(self, schema: Dict[str, Any], file_hash: str) -> Any:
seed = combined_seed(file_hash, hash_schema(schema))
return generate_data_from_schema(schema, seed)
def _create_job_and_run(
self,
*,
agent: ExtractAgent,
config: ExtractConfig,
data_schema: Dict[str, Any],
file_info: StoredFile,
job_status: StatusEnum,
run_status: ExtractState,
metadata: Dict[str, Any],
data: Any,
error: Optional[str],
project_id: str,
) -> StoredRun:
job_id = self._server.new_id("job")
run_id = self._server.new_id("run")
now = utcnow()
job = ExtractJob(
id=job_id,
file=file_info.file,
extraction_agent=agent,
status=job_status,
error=error,
)
run = ExtractRun(
id=run_id,
job_id=job_id,
file=file_info.file,
extraction_agent_id=agent.id,
status=run_status,
config=config,
data_schema=data_schema,
data=data,
extraction_metadata=metadata,
created_at=now,
updated_at=now,
from_ui=False,
error=error,
project_id=project_id,
)
stored = StoredRun(job=job, run=run)
self._jobs[job_id] = stored
self._runs[run_id] = run
return stored
def _pop_stub(
self,
stubs: List[ExtractRunStub],
context: RequestContext,
) -> Optional[ExtractRunStub]:
for index, stub in enumerate(list(stubs)):
if context.matches(stub.matcher):
if stub.once:
stubs.pop(index)
return stub
return None
def _pop_agent_stub(
self,
agent_id: str,
context: RequestContext,
) -> Optional[AgentRunStub]:
for index, stub in enumerate(list(self._agent_run_stubs)):
if stub.agent_id != agent_id:
continue
if context.matches(stub.matcher):
if stub.once:
self._agent_run_stubs.pop(index)
return stub
return None
@@ -0,0 +1,309 @@
from __future__ import annotations
import base64
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from urllib.parse import urlencode
import httpx
import respx
from llama_cloud.types import File as CloudFile
from llama_cloud.types import FileIdPresignedUrl, PresignedUrl
from ._deterministic import fingerprint_file, hash_chunks, utcnow
from .matchers import RequestContext, RequestMatcher
if TYPE_CHECKING:
from .server import FakeLlamaCloudServer
@dataclass(slots=True)
class StoredFile:
file: CloudFile
content: bytes
sha256: str
@dataclass(slots=True)
class PendingUpload:
file_id: str
filename: str
project_id: str
organization_id: str
external_file_id: Optional[str]
expected_size: Optional[int]
class FakeFilesNamespace:
def __init__(
self,
*,
server: "FakeLlamaCloudServer",
upload_base_url: str,
download_base_url: str,
) -> None:
self._server = server
self._upload_base_url = upload_base_url.rstrip("/")
self._download_base_url = download_base_url.rstrip("/")
self._files: Dict[str, StoredFile] = {}
self._pending: Dict[str, PendingUpload] = {}
self._upload_stubs: List[tuple[RequestMatcher | None, int, Dict[str, Any], bool]] = []
self.routes: Dict[str, respx.Route] = {}
# Public helpers -------------------------------------------------
def preload(self, *, path: str | Path, filename: Optional[str] = None) -> str:
path = Path(path)
content = path.read_bytes()
file_id = self._server.new_id("file")
name = filename or path.name
stored = self._build_file(
file_id=file_id,
name=name,
project_id=self._server.default_project_id,
organization_id=self._server.default_organization_id,
content=content,
external_file_id=None,
)
self._files[file_id] = stored
return file_id
def read(self, file_id: str) -> bytes:
return self._files[file_id].content
def get(self, file_id: str) -> Optional[StoredFile]:
return self._files.get(file_id)
def stub_upload(
self,
matcher: Optional[RequestMatcher],
*,
status_code: int = 413,
json_body: Optional[Dict[str, Any]] = None,
once: bool = True,
) -> None:
body = json_body or {"detail": "upload rejected by fake server"}
self._upload_stubs.append((matcher, status_code, body, once))
# Route registration ---------------------------------------------
def register(self) -> None:
server = self._server
server.add_route(
"PUT",
"/api/v1/files",
self._handle_generate_presigned_url,
namespace="files",
alias="generate_presigned_url",
)
upload_route = server.add_route(
"POST",
"/api/v1/files",
self._handle_direct_upload,
namespace="files",
alias="upload",
)
self.routes["upload"] = upload_route
get_route = server.add_route(
"GET",
"/api/v1/files/{file_id}",
self._handle_get_metadata,
namespace="files",
alias="get",
)
self.routes["get"] = get_route
server.add_route(
"DELETE",
"/api/v1/files/{file_id}",
self._handle_delete,
namespace="files",
)
server.add_route(
"GET",
"/api/v1/files/{file_id}/content",
self._handle_read_content,
namespace="files",
alias="read_content",
)
server.add_route(
"PUT",
"/upload/{file_id}",
self._handle_presigned_upload,
namespace="files",
base_urls=[self._upload_base_url],
alias="presigned_upload",
)
server.add_route(
"GET",
"/files/{file_id}",
self._handle_presigned_download,
namespace="files",
base_urls=[self._download_base_url],
alias="download",
)
# Handlers -------------------------------------------------------
def _handle_generate_presigned_url(self, request: httpx.Request) -> httpx.Response:
data = self._server.json(request)
now = utcnow()
file_id = self._server.new_id("file")
name = data.get("name") or f"upload-{file_id}.bin"
pending = PendingUpload(
file_id=file_id,
filename=name,
project_id=request.url.params.get("project_id", self._server.default_project_id),
organization_id=request.url.params.get(
"organization_id", self._server.default_organization_id
),
external_file_id=data.get("external_file_id"),
expected_size=data.get("file_size"),
)
self._pending[file_id] = pending
presigned = FileIdPresignedUrl(
file_id=file_id,
url=f"{self._upload_base_url}/upload/{file_id}",
expires_at=now,
form_fields=None,
)
return self._server.json_response(presigned.dict())
def _handle_direct_upload(self, request: httpx.Request) -> httpx.Response:
file_bytes, filename = self._extract_multipart_file(request)
file_id = self._server.new_id("file")
stored = self._build_file(
file_id=file_id,
name=filename or f"upload-{file_id}.bin",
project_id=request.url.params.get("project_id", self._server.default_project_id),
organization_id=request.url.params.get(
"organization_id", self._server.default_organization_id
),
content=file_bytes,
external_file_id=request.url.params.get("external_file_id"),
)
self._files[file_id] = stored
return self._server.json_response(stored.file.dict())
def _handle_get_metadata(self, request: httpx.Request) -> httpx.Response:
file_id = request.url.path.split("/")[-1]
if file_id not in self._files:
return self._server.json_response({"detail": "File not found"}, status_code=404)
return self._server.json_response(self._files[file_id].file.dict())
def _handle_delete(self, request: httpx.Request) -> httpx.Response:
file_id = request.url.path.split("/")[-1]
self._files.pop(file_id, None)
self._pending.pop(file_id, None)
return self._server.json_response({}, status_code=200)
def _handle_read_content(self, request: httpx.Request) -> httpx.Response:
file_id = request.url.path.split("/")[-2]
if file_id not in self._files:
return self._server.json_response({"detail": "File not found"}, status_code=404)
presigned = PresignedUrl(
url=f"{self._download_base_url}/files/{file_id}?{urlencode({'token': 'fake'})}",
expires_at=utcnow(),
form_fields=None,
)
return self._server.json_response(presigned.dict())
def _handle_presigned_upload(self, request: httpx.Request) -> httpx.Response:
file_id = request.url.path.split("/")[-1]
pending = self._pending.get(file_id)
context = RequestContext(
request=request,
json=None,
file_id=file_id,
filename=pending.filename if pending else None,
file_sha256=hash_chunks([request.content]),
)
for index, (matcher, status, body, once) in enumerate(list(self._upload_stubs)):
if context.matches(matcher):
if once:
self._upload_stubs.pop(index)
return self._server.json_response(body, status_code=status)
if pending is None:
return self._server.json_response({"detail": "Unknown file"}, status_code=404)
stored = self._build_file(
file_id=file_id,
name=pending.filename,
project_id=pending.project_id,
organization_id=pending.organization_id,
content=request.content,
external_file_id=pending.external_file_id,
)
self._files[file_id] = stored
self._pending.pop(file_id, None)
return httpx.Response(204)
def _handle_presigned_download(self, request: httpx.Request) -> httpx.Response:
file_id = request.url.path.split("/")[-1]
stored = self._files.get(file_id)
if not stored:
return httpx.Response(404, json={"detail": "File not found"})
return httpx.Response(200, content=stored.content)
# Internal helpers -----------------------------------------------
def _build_file(
self,
*,
file_id: str,
name: str,
project_id: str,
organization_id: str,
content: bytes,
external_file_id: Optional[str],
) -> StoredFile:
sha256 = fingerprint_file(content, name)
now = utcnow()
cloud_file = CloudFile(
id=file_id,
name=name,
project_id=project_id,
external_file_id=external_file_id,
file_size=len(content),
file_type=Path(name).suffix or "application/octet-stream",
created_at=now,
updated_at=now,
data_source_id=None,
permission_info=None,
resource_info=None,
last_modified_at=now,
)
return StoredFile(file=cloud_file, content=content, sha256=sha256)
def _extract_multipart_file(self, request: httpx.Request) -> tuple[bytes, Optional[str]]:
content_type = request.headers.get("content-type", "")
if "multipart/form-data" not in content_type:
raise ValueError("Expected multipart upload")
boundary = content_type.split("boundary=")[-1]
boundary_bytes = boundary.encode("utf-8")
body = request.content
delimiter = b"--" + boundary_bytes
parts = [part for part in body.split(delimiter) if part.strip(b"\r\n") and part.strip(b"\r\n") != b"--"]
for part in parts:
headers, _, payload = part.partition(b"\r\n\r\n")
header_text = headers.decode("utf-8", errors="ignore")
if 'name="upload_file"' in header_text or 'name="file"' in header_text:
filename = None
if "filename=" in header_text:
filename = (
header_text.split("filename=")[-1]
.strip()
.strip('"')
.strip("'")
)
return payload.rstrip(b"\r\n"), filename
raise ValueError("upload file part not found")
def decode_file_data(self, data: Dict[str, Any]) -> tuple[bytes, Optional[str]]:
if "file" not in data:
raise ValueError("file payload missing")
file_payload = data["file"]
encoded = file_payload["data"]
content = base64.b64decode(encoded)
filename = file_payload.get("filename")
return content, filename
@@ -0,0 +1,98 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable, Optional
import httpx
MatcherPredicate = Callable[[httpx.Request], bool]
@dataclass(slots=True)
class FileMatcher:
filename: Optional[str] = None
sha256: Optional[str] = None
file_id: Optional[str] = None
@dataclass(slots=True)
class SchemaMatcher:
model: Optional[type] = None
schema_hash: Optional[str] = None
@dataclass(slots=True)
class RequestMatcher:
file: Optional[FileMatcher | MatcherPredicate] = None
schema: Optional[SchemaMatcher] = None
agent_id: Optional[str] = None
project_id: Optional[str] = None
organization_id: Optional[str] = None
predicate: Optional[MatcherPredicate] = None
@dataclass(slots=True)
class RequestContext:
request: httpx.Request
json: Optional[dict[str, Any]]
file_id: Optional[str] = None
filename: Optional[str] = None
file_sha256: Optional[str] = None
schema_hash: Optional[str] = None
agent_id: Optional[str] = None
project_id: Optional[str] = None
organization_id: Optional[str] = None
def matches(self, matcher: Optional[RequestMatcher]) -> bool:
if matcher is None:
return True
if matcher.project_id and matcher.project_id != self.project_id:
return False
if matcher.organization_id and matcher.organization_id != self.organization_id:
return False
if matcher.agent_id and matcher.agent_id != self.agent_id:
return False
if matcher.file:
if isinstance(matcher.file, FileMatcher):
if matcher.file.filename and matcher.file.filename != self.filename:
return False
if matcher.file.file_id and matcher.file.file_id != self.file_id:
return False
if matcher.file.sha256 and matcher.file.sha256 != self.file_sha256:
return False
else:
if not matcher.file(self.request):
return False
if matcher.schema:
if matcher.schema.schema_hash and matcher.schema.schema_hash != self.schema_hash:
return False
if matcher.schema.model and matcher.schema.schema_hash:
return matcher.schema.schema_hash == self.schema_hash
if matcher.schema.model and matcher.schema.schema_hash is None:
expected = _schema_hash_from_model(matcher.schema.model)
return expected == self.schema_hash
if matcher.predicate and not matcher.predicate(self.request):
return False
return True
def _schema_hash_from_model(model: type) -> Optional[str]:
if hasattr(model, "model_json_schema"):
schema = model.model_json_schema()
elif hasattr(model, "schema"):
schema = model.schema() # type: ignore[attr-defined]
else:
return None
from ._deterministic import hash_schema
return hash_schema(schema)
@@ -0,0 +1,149 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict
import httpx
from ._deterministic import generate_text_blob, hash_schema
if TYPE_CHECKING:
from .server import FakeLlamaCloudServer
@dataclass(slots=True)
class ParseJobRecord:
job_id: str
file_name: str
status: str
result: Dict[str, Any]
content: bytes
class FakeParseNamespace:
def __init__(self, *, server: "FakeLlamaCloudServer") -> None:
self._server = server
self._jobs: Dict[str, ParseJobRecord] = {}
self.routes: Dict[str, Any] = {}
def register(self) -> None:
server = self._server
server.add_route(
"POST",
"/api/parsing/upload",
self._handle_upload,
namespace="parse",
)
server.add_route(
"GET",
"/api/parsing/job/{job_id}",
self._handle_job_status,
namespace="parse",
)
server.add_route(
"GET",
"/api/parsing/job/{job_id}/result/{result_type}",
self._handle_job_result,
namespace="parse",
)
def _handle_upload(self, request: httpx.Request) -> httpx.Response:
file_bytes, filename, form_data = self._split_multipart(request)
job_id = self._server.new_id("parse-job")
seed = hash_schema({"filename": filename, "form": form_data})
page_text = generate_text_blob(seed, sentences=3)
pages = [
{
"page": index + 1,
"text": f"{page_text} (page {index + 1})",
"md": f"{page_text} (page {index + 1})",
"images": [],
"charts": [],
"tables": [],
"layout": [],
"items": [],
"status": "SUCCESS",
"links": [],
"width": 8.5,
"height": 11.0,
"parsingMode": "deterministic",
"structuredData": {},
"noStructuredContent": False,
"noTextContent": False,
"isAudioTranscript": False,
"durationInSeconds": None,
"slideSpeakerNotes": None,
}
for index in range(1)
]
result = {
"job_id": job_id,
"status": "SUCCESS",
"file_name": filename,
"is_done": True,
"pages": pages,
"job_metadata": {"job_pages": len(pages)},
"text": "\n\n".join(page["text"] for page in pages),
"markdown": "\n\n".join(page["md"] for page in pages),
"json": {"pages": pages},
}
record = ParseJobRecord(
job_id=job_id,
file_name=filename,
status="SUCCESS",
result=result,
content=file_bytes,
)
self._jobs[job_id] = record
return self._server.json_response({"id": job_id})
def _handle_job_status(self, request: httpx.Request) -> httpx.Response:
job_id = request.url.path.split("/")[-1]
job = self._jobs.get(job_id)
if not job:
return self._server.json_response({"detail": "Job not found"}, status_code=404)
return self._server.json_response({"id": job_id, "status": job.status})
def _handle_job_result(self, request: httpx.Request) -> httpx.Response:
job_id = request.url.path.split("/")[-3]
job = self._jobs.get(job_id)
if not job:
return self._server.json_response({"detail": "Result not found"}, status_code=404)
return self._server.json_response(job.result)
def _split_multipart(
self, request: httpx.Request
) -> tuple[bytes, str, Dict[str, str]]:
content_type = request.headers.get("content-type", "")
if "multipart/form-data" not in content_type:
raise ValueError("Expected multipart form data for parse upload")
boundary = content_type.split("boundary=")[-1]
delimiter = f"--{boundary}".encode()
closing = f"--{boundary}--".encode()
parts = []
body = request.content
for chunk in body.split(delimiter):
chunk = chunk.strip()
if not chunk or chunk == closing:
continue
parts.append(chunk)
file_bytes = b""
filename = "upload.pdf"
form_data: Dict[str, str] = {}
for part in parts:
header_blob, _, payload = part.partition(b"\r\n\r\n")
payload = payload.rstrip(b"\r\n")
header_text = header_blob.decode("utf-8", errors="ignore")
if "filename=" in header_text:
filename = (
header_text.split("filename=")[-1].strip().strip('"').strip("'")
)
file_bytes = payload
else:
name = (
header_text.split('name="')[-1]
.split('"')[0]
.strip()
)
form_data[name] = payload.decode("utf-8", errors="ignore")
if not file_bytes:
raise ValueError("File part missing from multipart payload")
return file_bytes, filename, form_data
@@ -0,0 +1,171 @@
from __future__ import annotations
import json
import re
import uuid
from typing import Any, Callable, Dict, Optional, Sequence
import httpx
import respx
from .classify import FakeClassifyNamespace
from .extract import FakeExtractNamespace
from .files import FakeFilesNamespace
from .parse import FakeParseNamespace
Handler = Callable[[httpx.Request], httpx.Response]
class FakeLlamaCloudServer:
DEFAULT_BASE_URL = "https://api.cloud.llamaindex.ai"
DEFAULT_UPLOAD_BASE = "https://uploads.fake-llama.test"
DEFAULT_DOWNLOAD_BASE = "https://downloads.fake-llama.test"
def __init__(
self,
*,
base_urls: Optional[Sequence[str]] = None,
namespaces: Optional[Sequence[str]] = None,
upload_base_url: Optional[str] = None,
download_base_url: Optional[str] = None,
default_project_id: str = "proj-test",
default_organization_id: str = "org-test",
) -> None:
self.base_urls = tuple(base_urls or (self.DEFAULT_BASE_URL,))
selected = namespaces or ("files", "extract", "parse", "classify")
self._namespace_names = {name.lower() for name in selected}
self._upload_base_url = upload_base_url or self.DEFAULT_UPLOAD_BASE
self._download_base_url = download_base_url or self.DEFAULT_DOWNLOAD_BASE
self.default_project_id = default_project_id
self.default_organization_id = default_organization_id
self.router = respx.MockRouter(assert_all_called=False)
self._installed = False
self._registered = False
self.files = FakeFilesNamespace(
server=self,
upload_base_url=self._upload_base_url,
download_base_url=self._download_base_url,
)
self.extract = FakeExtractNamespace(server=self, files=self.files)
self.parse = FakeParseNamespace(server=self)
self.classify = FakeClassifyNamespace(server=self, files=self.files)
# Context management ----------------------------------------------
def install(self) -> "FakeLlamaCloudServer":
if not self._registered:
self._register_namespaces()
if not self._installed:
self.router.__enter__()
self._installed = True
return self
def uninstall(self) -> None:
if self._installed:
self.router.__exit__(None, None, None)
self._installed = False
def __enter__(self) -> "FakeLlamaCloudServer":
return self.install()
def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
self.uninstall()
# Route utilities -------------------------------------------------
def add_route(
self,
method: str,
path: str,
handler: Handler,
*,
namespace: str,
alias: Optional[str] = None,
base_urls: Optional[Sequence[str]] = None,
) -> respx.Route:
urls = base_urls or self.base_urls
first_route: Optional[respx.Route] = None
for base in urls:
route = self._register_route(method, base, path, handler)
if first_route is None:
first_route = route
if alias and first_route:
setattr(self, alias, first_route)
return first_route # type: ignore[return-value]
def _register_route(
self,
method: str,
base: str,
path: str,
handler: Handler,
) -> respx.Route:
url = self._build_url(base, path)
if "{" in path:
regex = self._compile_regex(base, path)
route = self.router.route(method=method, url__regex=regex)
else:
route = self.router.route(method=method, url=url)
route.mock(side_effect=lambda request, func=handler: func(request))
return route
def _build_url(self, base: str, path: str) -> str:
base = base.rstrip("/")
if not path.startswith("/"):
path = "/" + path
return f"{base}{path}"
def _compile_regex(self, base: str, path: str) -> re.Pattern[str]:
escaped = re.escape(base.rstrip("/"))
regex_path = re.sub(r"\{[^/]+\}", r"[^/]+", path)
pattern = f"^{escaped}{regex_path}$"
return re.compile(pattern)
# Helpers ---------------------------------------------------------
def json(self, request: httpx.Request) -> Dict[str, Any]:
if not request.content:
return {}
return json.loads(request.content.decode("utf-8"))
def encode_json(self, payload: Dict[str, Any]) -> bytes:
return json.dumps(payload).encode("utf-8")
def json_response(self, payload: Any, *, status_code: int = 200) -> httpx.Response:
body = json.dumps(payload, default=self._json_default).encode("utf-8")
headers = {"content-type": "application/json"}
return httpx.Response(status_code=status_code, headers=headers, content=body)
def new_id(self, prefix: str) -> str:
return f"{prefix}_{uuid.uuid4().hex[:8]}"
# Internal --------------------------------------------------------
def _json_default(self, value: Any) -> Any:
if hasattr(value, "model_dump"):
return value.model_dump()
if hasattr(value, "dict"):
return value.dict()
if isinstance(value, (set, frozenset)):
return list(value)
if isinstance(value, (bytes, bytearray)):
return value.decode("utf-8")
if hasattr(value, "isoformat"):
try:
return value.isoformat() # datetime/date support
except Exception:
pass
raise TypeError(f"{value!r} is not JSON serializable")
def _register_namespaces(self) -> None:
if "files" in self._namespace_names:
self.files.register()
if "extract" in self._namespace_names:
self.extract.register()
if "parse" in self._namespace_names:
self.parse.register()
if "classify" in self._namespace_names:
self.classify.register()
self._registered = True
__all__ = ["FakeLlamaCloudServer"]
+2 -1
View File
@@ -37,7 +37,8 @@ dependencies = [
"eval-type-backport>=0.2.0,<0.3 ; python_version < '3.10'",
"platformdirs>=4.3.7,<5",
"tenacity>=8.5.0, <10.0",
"packaging>=23.0"
"packaging>=23.0",
"respx[tests]>=0.22.0"
]
[project.scripts]
+101
View File
@@ -0,0 +1,101 @@
# Testing Utils Implementation Plan
## Goal
Build a `FakeLlamaCloudServer` that intercepts all SDK HTTP traffic (extract, parse, classify, files, etc.) and returns deterministic responses so offline tests behave like production, per `testing_utils_spec.md`.
## High-Level Phases
1. **Router + lifecycle scaffold**
- Implement `FakeLlamaCloudServer` with `respx.Router` that can be used as a context manager or via explicit `install()` / `uninstall()`.
- Support multiple base URLs and namespace filtering so only selected APIs are intercepted.
2. **State stores + deterministic generators**
- Create in-memory stores for files, jobs, runs, parse results, and classification predictions.
- Implement deterministic data generation seeded by (file hash, schema hash, namespace) as described in the spec.
3. **Namespace handlers**
- Extract: stub `/api/v1/extraction/extraction-agents/*`, `/api/v1/extraction/run`, `/api/v1/extraction/jobs*`, `/api/v1/extraction/runs/by-job/{id}`.
- Files: stub `/api/v1/files/**` plus presigned upload/download workflows.
- Parse: stub `/api/parsing/upload`, `/api/parsing/job/{id}`, `/api/parsing/job/{id}/result/{result_type}`.
- Classify: stub `/api/v1/classifier/*` (job creation, polling, results).
4. **Matcher + override system**
- Implement `RequestMatcher`, `FileMatcher`, `SchemaMatcher`, etc., and expose helper APIs like `fake.extract.stub_run`.
5. **Ergonomic utilities**
- Provide helper shortcuts (`fake.extract.stateless_run`) and spy APIs (call count assertions).
6. **Docs + tests**
- Document usage in `testing_utils_spec.md`.
- Add tests that demonstrate end-to-end flows using the fake.
## Detailed Steps & Considerations
### 1. Router & Lifecycle
- Create a single `FakeLlamaCloudServer` class that holds a `respx.Router` configured for each base URL.
- Provide `__enter__/__exit__` plus `install()/uninstall()` to attach/detach the router.
- Complexity: need to handle both sync and async clients since `LlamaParse` uses raw `httpx.AsyncClient` instances constructed on the fly. Ensure `respx.mock(assert_all_called=False)` works for both.
### 2. State Management & Determinism
- Implement a `FileStore` that tracks uploaded file bytes, metadata, generated IDs, and seeded RNG values.
- Implement `ExtractStore`, `ParseStore`, `ClassifyStore` to track job lifecycles and generated runs.
- Deterministic generator design:
- Compute SHA256 of (file bytes + filename) and of normalized schema JSON.
- Combine into a seed (e.g., `seed = sha256(file_hash + schema_hash)`).
- Use that seed for namespace-specific RNG (extract uses schema walk, parse uses layout heuristics, classify uses label sets).
- Complexity: schema normalization requires Pydantic `model_json_schema()` ordering; ensure we match production ordering to avoid drift.
### 3. Namespace Handlers
#### Extract
- Stub endpoints listed under `LlamaExtract` usage (`create_extraction_agent`, `run_job`, stateless run, poll job/run).
- Mirror response bodies (`ExtractJob`, `ExtractRun`, `PaginatedExtractRunsResponse`) so the SDKs type deserialization works.
- Manage transitions `PENDING → SUCCESS/FAILED` with realistic timestamps.
#### Files
- Implement both presigned workflow and direct upload fallback:
- `POST /api/v1/files` (or equivalent) returns a fake presigned URL (e.g., `https://fake-upload.local/{file_id}`) that our router also intercepts.
- The subsequent `PUT` should store the bytes and mark upload complete.
- `GET /api/v1/files/{id}` returns stored metadata; `read_file_content` returns presigned download URLs or raw bytes.
- Complexity: need to intercept arbitrary presigned hostnames (e.g., AWS S3). Spec does not clarify if presigned URLs live on the same base; we may need to whitelist custom domains or provide a fake S3 host.
#### Parse
- Because `LlamaParse` manually constructs `/api/parsing/*` URLs, ensure the fake registers these exact routes against every provided base URL.
- Store job configs, return deterministic `JobResult` payloads (text, markdown, JSON), and support partitioned jobs.
#### Classify
- Stub job creation/polling/responses, ensuring statuses transition according to `StatusEnum`.
- Return deterministically chosen labels based on input payload + rules (seed derived from contents).
### 4. Matcher / Override System
- Provide dataclasses from the spec (`FileMatcher`, `SchemaMatcher`, `RequestMatcher`).
- Implement matcher evaluation order with `once=True` behavior to remove one-time overrides.
- Expose helper APIs:
- `fake.extract.stub_run(...)`
- `fake.parse.stub_parse(...)`
- `fake.classify.stub_prediction(...)`
- `fake.files.stub_upload(...)`, etc.
- Complexity: Need to ensure matcher evaluation can inspect raw `httpx.Request` bodies/headers for both sync and async flows without consuming the stream twice.
### 5. Assertions & Spies
- Expose convenience attributes pointing to `respx.Route` objects for frequently used paths (e.g., `fake.extract.stateless_run`).
- Provide helper methods for call counts, captured requests, etc.
- Ensure naming stays stable to avoid brittle tests.
### 6. Testing Strategy
- Add pytest fixtures to install the fake server globally for integration tests.
- Cover scenarios:
- Stateless extract returns deterministic payload.
- Agent-backed extract polls job/runs.
- Files API handles presigned upload and retrieval.
- Parse job lifecycle for both success and failure.
- Classification job with deterministic label output.
- Matcher overrides injection & once-only behavior.
- Mixed namespace configurations (e.g., intercept extract only, let parse hit real network).
## Extra Complexity & Spec Concerns
- **Presigned URL scope**: Spec assumes presigned uploads can be intercepted the same way as SaaS APIs, but actual presigned URLs often point to AWS domains outside `base_urls`. Need a strategy (e.g., generate fake host names the SDK will call, or rewrite responses to use local URLs).
- **Async client coverage**: LlamaParse builds new `httpx.AsyncClient` objects; the specs install/uninstall story must ensure respx patches all clients, not just the global one.
- **Deterministic generators**: The spec outlines hashing inputs but doesnt define exact algorithms. Without mirroring production generator logic, fixtures might diverge. We may need to document any intentional differences.
- **Job state timelines**: The spec expects transitions (`PENDING → SUCCESS`) with realistic timestamps. Need to ensure we schedule async updates or respond with multi-step polling; otherwise, tests relying on delays may behave differently.
- **Namespace toggling**: Clarify behavior when a namespace is disabled—should unmatched routes fall through automatically or raise? Current spec implies fall-through to real network, but that could be surprising in CI.
- **Schema handling**: `_validate_schema` currently calls production for dict schemas. The fake must emulate validation; otherwise tests will still hit SaaS. Spec doesnt detail validation logic, so we must decide on a simplified validator or deterministic echo.
- **Parse partitioning**: `LlamaParse` can partition jobs and expects consistent pagination semantics. Need to ensure deterministic results respect `target_pages`, `partition_pages`, etc., or document limitations.
## Next Actions
1. Prototype router + lifecycle with namespace toggles.
2. Implement FileStore + presigned workflow since other namespaces depend on file IDs.
3. Build deterministic generators and stores for extract/parse/classify.
4. Layer matcher/override system on top of the stores.
5. Write initial tests per namespace and refine spec gaps (presigned host, validation behavior).
6. Update `testing_utils_spec.md` with any clarifications discovered above.
+199
View File
@@ -0,0 +1,199 @@
# Testing Utils Research
## Objective
The `FakeLlamaCloudServer` proposal aims to intercept raw HTTP traffic for extract, parse, classify, and files APIs so local tests behave like SaaS without per-test stubbing. The mock must respect base URLs, support context-manager or long-lived install modes, and deterministically synthesize payloads from file + schema hashes while still allowing targeted overrides.
```1:67:py/testing_utils_spec.md
## Local Testing Utilities 2.0 (Spec Draft)
- **Everything mocked by default** …
- **Context manager optional** …
- **Pydantic-first ergonomics** …
- **API-only contract** …
```
## Python Hand-Written SDK Surfaces
These modules sit on top of the generated `llama_cloud` client and are what most application tests exercise. `FakeLlamaCloudServer` must satisfy the HTTP contracts they rely on.
### Extract flow (`py/llama_cloud_services/extract/extract.py`)
- The class imports resource types from `llama_cloud` and owns an `AsyncLlamaCloud` client shared across both stateless and agent-backed flows.
```17:37:py/llama_cloud_services/extract/extract.py
from llama_cloud import (
ExtractAgent as CloudExtractAgent,
ExtractConfig,
)
from llama_cloud.client import AsyncLlamaCloud
```
- Schema validation always calls `POST /api/v1/extraction/extraction-agents/schema/validation` through `client.llama_extract.validate_extraction_schema`, so the fake must mimic that endpoint.
```65:82:py/llama_cloud_services/extract/extract.py
async def _validate_schema(...):
validated_schema = await client.llama_extract.validate_extraction_schema(
data_schema=processed_schema
)
```
- Agent creation, listing, and run management use the `llama_extract` namespace. These calls surface the same request/response bodies as the SaaS API, so tests that interact through agents expect consistent metadata (IDs, status enums, etc.).
```636:738:py/llama_cloud_services/extract/extract.py
def create_agent(...):
agent = self._run_in_thread(
self._async_client.llama_extract.create_extraction_agent(
project_id=self._project_id,
)
)
return ExtractionAgent(...)
```
- Stateless extraction queues work by converting file inputs into either `file_id`, inline text, or base64 payloads and forwarding them to `POST /api/v1/extraction/run`. Deterministic responses from the fake should be keyed off `processed_schema` + whichever file representation the SDK sent.
```921:1018:py/llama_cloud_services/extract/extract.py
async def queue_extraction(...):
processed_schema = await _validate_schema(...)
job = await self._async_client.llama_extract.extract_stateless(
project_id=self._project_id,
organization_id=self._organization_id,
data_schema=processed_schema,
config=config,
**file_args,
)
```
### File uploads (`py/llama_cloud_services/files/client.py`)
- All higher-level services route file uploads/downloads through `FileClient`. When `use_presigned_url` is enabled, the client first calls `POST /api/v1/files` (generate URL), then performs the PUT upload directly, then fetches metadata via `GET /api/v1/files/{id}`. The fake must intercept both the API calls and the presigned PUT hops to return consistent `file_id`s and stored bytes.
```63:140:py/llama_cloud_services/files/client.py
presigned_url = await self.client.files.generate_presigned_url(...)
upload_response = await httpx_client.put(presigned_url.url, data=buffer.read())
return await self.client.files.get_file(presigned_url.file_id, …)
```
### Parse reader (`py/llama_cloud_services/parse/base.py`)
- `LlamaParse` is a bespoke reader that talks straight to HTTP routes defined in the module (e.g., `/api/parsing/upload`, `/api/parsing/job/{id}`). Unlike `LlamaExtract`, it does not go through the generated client; instead it builds URLs manually and uses `httpx`/`make_api_request`. A fake server must therefore implement these exact paths.
```49:66:py/llama_cloud_services/parse/base.py
JOB_RESULT_URL = "/api/parsing/job/{job_id}/result/{result_type}"
JOB_STATUS_ROUTE = "/api/parsing/job/{job_id}"
JOB_UPLOAD_ROUTE = "/api/parsing/upload"
```
```1056:1070:py/llama_cloud_services/parse/base.py
url = build_url(JOB_UPLOAD_ROUTE, self.organization_id, self.project_id)
resp = await make_api_request(self.aclient, "POST", url, …, files=files, data=data)
```
### Classifier beta client (`py/llama_cloud_services/beta/classifier/client.py`)
- Classification flows reuse `FileClient` for uploads and then call `AsyncLlamaCloud.classifier` endpoints (`create_classify_job`, `get_classify_job`, `get_classification_job_results`). Long-running tests poll until status becomes terminal, so mocking needs to cover both the enqueue POST and the follow-up GETs.
```75:151:py/llama_cloud_services/beta/classifier/client.py
return await self.client.classifier.create_classify_job(...)
results = await self.client.classifier.get_classification_job_results(
classify_job_with_status.id,
project_id=self.project_id,
)
```
## Generated Python Client
- The repo depends on the published `llama_cloud` package (currently 0.1.44) which is itself generated from the OpenAPI spec. All hand-written modules import types and service clients from this package, so the fake server may need to mirror whatever transport settings `AsyncLlamaCloud` expects (headers, pagination, etc.).
```1605:1608:py/uv.lock
sdist = { … "llama_cloud-0.1.44.tar.gz", … }
wheels = [{ … "llama_cloud-0.1.44-py3-none-any.whl", … }]
```
- Since `AsyncLlamaCloud` handles auth headers and base URLs, integrating the fake server typically means pointing `LLAMA_CLOUD_BASE_URL` at the mock and letting the generated client continue to build resource paths.
## TypeScript + OpenAPI Assets
- The canonical OpenAPI document lives in `ts/llama_cloud_services/openapi.json`. It defines every path/operation used by both the generated TypeScript SDK and the Python client. For example, the stateless extract endpoint is captured as `POST /api/v1/extraction/run`.
```13825:13872:ts/llama_cloud_services/openapi.json
"/api/v1/extraction/run": {
"post": {
"summary": "Extract Stateless",
"description": "… Requires data_schema, config, and either file_id, text, or base64 encoded file data.",
}
}
```
- The OpenAPI document is downloaded from production via `scripts/download.mjs` and then fed into `@hey-api/openapi-ts` to regenerate the TypeScript client and schema wrappers. Keeping the fake server in sync with the spec means you can diff regenerated clients when the API evolves.
```3:21:ts/llama_cloud_services/scripts/download.mjs
const response = await fetch('https://api.cloud.llamaindex.ai/api/openapi.json');
fs.writeFileSync('openapi.json', JSON.stringify(data, null, 2));
```
```1:24:ts/llama_cloud_services/openapi-ts.config.ts
export default defineConfig({
input: "./openapi.json",
output: { path: "./src/client", format: "prettier", lint: "eslint" },
plugins: [ … "@hey-api/sdk", "@hey-api/typescript" ],
});
```
- The public TypeScript surface (`src/LlamaClassify.ts`, etc.) already consumes the generated client by injecting auth headers and delegating to `classify(...)`. If Python tests eventually need to reuse the same fake server, TypeScript examples provide another reference for how SDK consumers expect responses to look.
```12:74:ts/llama_cloud_services/src/LlamaClassify.ts
export class LlamaClassify {
constructor(apiKey?: string, baseUrl?: string, region?: string) {
this.client = createClient(createConfig({ baseUrl: url, headers: { Authorization: `Bearer ${key}` }}));
}
async classify(rules, configuration, { fileContents, filePaths, projectId, … }) {
const result = await classify(rules, configuration, {
fileContents,
filePaths,
projectId: projectId ?? undefined,
client: this.client,
});
return result;
}
}
```
## Endpoint Map to Stub First
Cross-referencing the Python call-sites with the OpenAPI spec yields the minimum set of HTTP routes the fake server must implement:
1. `/api/v1/extraction/run` for stateless jobs, plus the agent CRUD endpoints under `/api/v1/extraction/extraction-agents`, `/api/v1/extraction/jobs`, and `/api/v1/extraction/runs/by-job/{id}` (see `LlamaExtract` usage above).
2. `/api/v1/files/**` for upload/generate-presigned/list/get/delete, plus any presigned `PUT` destinations (`FileClient`).
3. `/api/parsing/upload`, `/api/parsing/job/{job_id}`, `/api/parsing/job/{job_id}/result/{result_type}` (direct HTTPX calls in `LlamaParse`).
4. `/api/v1/classifier/**` for job creation, polling, and result retrieval (`LlamaClassify` and `ClassifyClient`).
Having deterministic handlers for these routes unlocks end-to-end coverage of extract/parse/classify flows without touching live SaaS.
## Implementation Reminders from the Spec
- Namespace toggles let tests intercept a subset of APIs while letting others fall through—mirror this by allowing `FakeLlamaCloudServer(namespaces=[...])` to selectively register respx routes.
- Deterministic payloads should derive from uploaded file bytes + schema hashes for extract, layout characteristics for parse, and label sets for classify so that rerunning the same test yields identical responses (reducing fixture churn).
- Keep the matcher system (`RequestMatcher`, `FileMatcher`, etc.) flexible so individual tests can stub failures (e.g., presigned upload errors, job timeouts) without reconfiguring the entire fake.
```44:210:py/testing_utils_spec.md
with FakeLlamaCloudServer() as fake:
extractor = LlamaExtract(...)
parser = LlamaParse(...)
classifier = LlamaClassify(...)
fake.extract.stub_run(... RequestMatcher ...)
```
Armed with the file map above, a new developer can trace any SDK call from the hand-written layers down to the generated client and the authoritative OpenAPI route, making it clear where the fake server needs to hook in.
+329
View File
@@ -0,0 +1,329 @@
## Local Testing Utilities 2.0 (Spec Draft)
Offline testing should feel identical to calling the public LlamaCloud API. The new utilities center a single `FakeLlamaCloudServer` that intercepts HTTP traffic at the API boundary, deterministically generates responses from real files + schemas, and only requires overrides when a test needs to exercise edge cases.
### Design goals
- **Everything mocked by default**: instantiating `FakeLlamaCloudServer()` wires up every public LlamaCloud namespace (extract, parse, classify, files, etc.) so the SDK behaves as if it were talking to production. Deterministic responses are returned without any additional wiring.
- **Context manager optional**: `FakeLlamaCloudServer` still supports `with ...` for pytest isolation, but you can call `install()` / `uninstall()` to keep the mock server active inside a long-running process (e.g., a FastAPI dev server that proxies to the fake).
- **Pydantic-first ergonomics**: all documentation and helpers assume schemas are declared as `BaseModel` subclasses. JSON Schema dictionaries are still accepted for compatibility.
- **API-only contract**: handlers talk raw HTTP (request dicts, status codes, JSON payloads) so we can reuse the mock in future SDKs or other languages without depending on `LlamaExtract`.
### Quick start (pytest-friendly, deterministic by default)
```python
from pathlib import Path
from pydantic import BaseModel, Field
from llama_cloud import ExtractConfig, ExtractMode
from llama_cloud_services.extract import LlamaExtract
from llama_cloud_services.testing_utils import FakeLlamaCloudServer
class Receipt(BaseModel):
merchant: str = Field(description="Vendor name")
total: float = Field(description="Grand total in USD")
config = ExtractConfig(extraction_mode=ExtractMode.FAST)
pdf_path = Path("tests/fixtures/receipt.pdf")
with FakeLlamaCloudServer() as fake:
extractor = LlamaExtract(
api_key="test-key",
verify=False,
)
run = extractor.extract(Receipt, config, pdf_path)
assert run.status.value == "SUCCESS"
assert run.data["total"] > 0 # generated entirely from file + schema
```
Key points:
- No manual stubbing required. The fake server hashes the uploaded file bytes + schema JSON to derive a deterministic seed and walks the schema to produce stable mock data.
- `FakeLlamaCloudServer` automatically intercepts the default SaaS URL (`https://api.cloud.llamaindex.ai`). If your tests point at another host (e.g., BYOC), pass it via `FakeLlamaCloudServer(base_urls=["https://byoc.dev/api"])`; otherwise, keep using your normal SDK base URL.
### Works across extract, parse, classify
```python
from llama_cloud_services.testing_utils import FakeLlamaCloudServer
from llama_cloud_services.extract import LlamaExtract
from llama_cloud_services.parse import LlamaParse
from llama_cloud_services.classify import LlamaClassify
with FakeLlamaCloudServer() as fake:
extractor = LlamaExtract(api_key="test-key")
parser = LlamaParse(api_key="test-key")
classifier = LlamaClassify(api_key="test-key")
run = extractor.extract(
Receipt, config, "noisebridge.pdf"
) # reuse quick-start schema/config
parse_result = parser.parse("noisebridge.pdf")
classification = classifier.classify({"text": "foo"})
assert run.status.value == "SUCCESS"
assert parse_result.documents[0].text # deterministically generated
assert classification.prediction in {
"A",
"B",
} # stable RNG driven by payload
```
Every namespace uses its own deterministic generator (schema-driven for extract, layout-driven for parse, label-driven for classify) but shares the same matcher/override system described below.
### Limiting intercepted APIs
If you only need a subset of APIs (e.g., extract + files during early bring-up), pass `namespaces` explicitly. Anything omitted will fall through to the real network, which is handy for hybrid tests.
```python
fake = FakeLlamaCloudServer(
namespaces=["extract", "files"],
base_urls=["https://api.cloud.llamaindex.ai"], # optionally point at BYOC
)
with fake:
extractor = LlamaExtract(api_key="test-key")
extractor.extract(Receipt, config, "noisebridge.pdf")
```
### Long-lived install for iterative development
```python
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
fake = FakeLlamaCloudServer(
namespaces=["extract"],
base_urls=[
os.environ.get(
"LLAMA_CLOUD_BASE_URL", "https://api.cloud.llamaindex.ai"
)
],
)
@asynccontextmanager
async def lifespan(app):
fake.install()
app.state.extractor = LlamaExtract(
api_key="dev",
verify=False,
)
try:
yield
finally:
fake.uninstall()
app = FastAPI(lifespan=lifespan)
```
`install()`/`uninstall()` simply wrap the respx router lifecycle so you can keep the mock server hot for REPLs, background workers, or manual QA sessions without relying on a context manager.
### Files API behavior
`FakeLlamaCloudServer` ships with a first-class fake for `/api/v1/files/*` because uploads sit on the critical path for both extract and downstream workflows that pre-stage files.
- Every call to `POST /files/generate-presigned-url`, the subsequent `PUT` upload, and the follow-up `GET /files/{file_id}` is intercepted and stored in-memory. The response objects mirror the real API so `FileClient` keeps working unchanged.
- `fake.files.preload(path="tests/fixtures/plan.pdf", filename="plan.pdf")` ingests local fixtures ahead of time and returns a reusable `file_id`, which is useful when tests pass `SourceText(file_id=...)`.
- `fake.files.stub_upload(...)` lets you simulate storage failures (e.g., 413 "file too large") using the same matcher system as extract.
- You can download what the SDK uploaded via `fake.files.read(file_id)` to assert on the bytes or to feed downstream mocks.
```python
from llama_cloud_services.extract import SourceText
with FakeLlamaCloudServer() as fake:
file_id = fake.files.preload(path="tests/fixtures/noisebridge.pdf")
extractor = LlamaExtract(api_key="test-key")
run = extractor.extract(Receipt, config, SourceText(file_id=file_id))
assert fake.files.read(file_id).startswith(b"%PDF")
```
### Deterministic response generation
1. Files uploaded via `/api/v1/files` (or inlined via `extract_stateless`) are fingerprinted using SHA256 (file content bytes + filename).
2. Schemas are normalized (Pydantic `model_json_schema()` plus sorted keys) and hashed.
3. A seed derived from `sha256(file_fingerprint + schema_digest)` feeds a tiny RNG that walks the schema to synthesize values (numbers, strings, arrays) while respecting field metadata (descriptions hint names, numeric ranges, etc.).
4. Runs transition through the same states as production (`PENDING``SUCCESS`) and return realistic timestamps, metadata, and config echoes.
Because the seed is stable, rerunning the same schema/file pair yields identical mock payloads without stubbing.
### Stubbing, spying, and assertions
Most tests only need the deterministic defaults, but the fake server provides a layered set of helpers for overriding responses, asserting call counts, and finally dropping down to raw `respx` when necessary.
#### Matcher API
```python
from dataclasses import dataclass
from typing import Callable, Optional
from httpx import Request
@dataclass
class FileMatcher:
filename: str | None = None
sha256: str | None = None
file_id: str | None = None
@dataclass
class SchemaMatcher:
model: type[BaseModel] | None = None
schema_hash: str | None = None
@dataclass
class RequestMatcher:
file: FileMatcher | Callable[[Request], bool] | None = None
schema: SchemaMatcher | None = None
agent_id: str | None = None
project_id: str | None = None
organization_id: str | None = None
predicate: Callable[[Request], bool] | None = None
```
Every callable matcher receives the raw `httpx.Request` object that respx captured, so you can inspect headers, cookies, bodies, etc., without learning another wrapper type. The helper dataclasses (`FileMatcher`, `SchemaMatcher`) just cover the common cases; mix and match as needed. Stubs are evaluated in registration order, and `once=True` removes the stub after the first match.
#### Stateless extraction example
```python
fake.extract.stub_run(
matcher=RequestMatcher(file=FileMatcher(filename="noisebridge.pdf")),
data={"merchant": "Noisebridge", "total": 42.0},
status="SUCCESS", # defaults to deterministic timeline when omitted
metadata={"source": "unit-test"},
once=True,
)
run = extractor.extract(Receipt, config, "noisebridge.pdf")
assert run.data["merchant"] == "Noisebridge"
```
`data` accepts dictionaries, Pydantic models, or callables (`Callable[[Request], dict]`). If you omit `status`, the stub only replaces the payload while preserving the deterministic job/run lifecycle.
#### Agent extraction example
```python
agent = extractor.create_agent(
name="tests", data_schema=Receipt, config=config
)
fake.extract.stub_agent_run(
agent_id=agent.id,
matcher=RequestMatcher(file=FileMatcher(filename="bad.pdf")),
job_status="FAILED", # overrides POST /extraction/jobs
run_status="FAILED", # overrides GET /runs/by-job
error={"message": "Schema mismatch"},
)
with pytest.raises(ApiError):
agent.extract("bad.pdf")
```
`stub_agent_run` targets the stateful job pipeline (`/extraction/jobs`, `/extraction/jobs/{id}`, `/extraction/runs/by-job/{id}`) so you can mimic long-running failures, retries, or partial completions without hand-writing multiple HTTP handlers.
#### Parse, classify, and files stubs
- `fake.parse.stub_parse(...)` lets you override document splits, token counts, or even return structured HTML for specific file IDs.
- `fake.classify.stub_prediction(...)` accepts label sets and score distributions so you can test downstream logic that inspects confidences.
- `fake.files.stub_upload(...)` / `fake.files.stub_download(...)` simulate storage edge cases such as timeouts or corrupted content.
Because every namespace uses the same matcher primitives, you can coordinate multi-API scenarios (e.g., stub the file upload and the subsequent extract run) without duplicating predicates.
#### Assertions without extra abstractions
Since everything runs through the same `respx.MockRouter` you already use elsewhere, assertions stay lightweight:
```python
with FakeLlamaCloudServer() as fake:
route = fake.router["POST", "/api/v1/extraction/run"]
extractor = LlamaExtract(api_key="test-key")
extractor.extract(Receipt, config, "noisebridge.pdf")
assert route.called
assert route.call_count == 1
req = route.calls[0].request # this is httpx.Request
assert req.headers["authorization"].startswith("Bearer ")
```
For friendlier names, every frequently used route is also pinned to a stable attribute:
- `fake.extract.stateless_run` (alias: `fake.extract_run`) → `POST /api/v1/extraction/run`
- `fake.extract.agent_job``POST /api/v1/extraction/jobs`
- `fake.extract.agent_run``GET /api/v1/extraction/runs/by-job/{job_id}`
- `fake.files.upload``POST /api/v1/files/upload` (or the presigned PUT hop, depending on mode)
- `fake.files.get``GET /api/v1/files/{file_id}`
Each attribute is the underlying `respx.Route`, so assertions feel natural:
```python
assert fake.extract.stateless_run.called
fake.extract.stateless_run.assert_called_once()
assert fake.files.upload.call_count == 1
assert fake.extract_run.called # global alias for the same route
```
If you ever need the full mapping, `fake.extract.routes["stateless_run"] is fake.extract.stateless_run`.
#### Advanced (respx-level) overrides
When you need total control, drop straight into respx:
```python
route = fake.router["POST", "/api/v1/extraction/run"]
route.mock(side_effect=lambda request: (418, {"detail": "I'm a teapot"}))
```
Or use the attribute shortcuts:
```python
fake.extract.stateless_run.mock(
side_effect=lambda request: (500, {"detail": "boom"})
)
```
Either way you're dealing with the canonical respx objects, so regex paths, call assertions, and other ecosystem tools keep working. The only convention is that handlers should return `(status_code, json_body | bytes)` so logging and deterministic fallbacks remain consistent.
### API-layer implementation hints
- Route decorators such as `server.add_handler("POST", "/api/v1/extraction/run")` install handlers for **every** registered base URL declared in the constructor, keeping the mock independent of SDK client classes.
- Request objects passed to handlers expose method, URL, headers, query params, JSON body, and raw bytes—everything needed to mirror production without importing internal models.
- Namespaces self-register via the constructor (e.g., `namespaces=["extract"]`) so no additional attach helpers are required; future SDKs can opt into the same HTTP contracts by toggling the namespaces they care about.
## Research: Extract SDK surface map
The current Python SDK (`py/llama_cloud_services/extract/extract.py`) is a thin wrapper over the HTTP API exposed in `ts/llama_cloud_services/openapi.json`. Understanding this mapping helps ensure the fake server mirrors the real contract.
### Core classes
- `LlamaExtract`: factory that owns an `AsyncLlamaCloud` client, manages thread pools, and exposes both stateless extraction (`extract`, `aextract`, `queue_extraction`) and agent CRUD helpers.
- `ExtractionAgent`: wraps an existing agent returned by the API and provides methods for queuing files, polling jobs, listing runs, updating schemas/configs, and deleting runs.
- `FileClient`: abstracts the `/api/v1/files` upload + download flow, including presigned URL handling for uploads.
### Stateless extraction flow
1. `LlamaExtract.queue_extraction(data_schema, config, files)` validates schemas via `POST /api/v1/extraction/extraction-agents/schema/validation`, converts input files into either `file_id`, `file` (base64 body), or inline `text`.
2. For each file the SDK calls `POST /api/v1/extraction/run` with the processed schema + config + file payload. The API responds with an `ExtractJob`.
3. `LlamaExtract.aextract` waits for completion by polling `_wait_for_job_result`, which hits `GET /api/v1/extraction/jobs/{job_id}` until the job is `SUCCESS`/`FAILED`, then fetches the run via `GET /api/v1/extraction/runs/by-job/{job_id}`. The synchronous `extract` just wraps this coroutine in a worker thread.
### Agent-backed flow
1. `create_agent` issues `POST /api/v1/extraction/extraction-agents` with name, schema, and config; responses seed `ExtractionAgent`.
2. `ExtractionAgent.queue_extraction` uploads files via `FileClient`, then enqueues jobs with `POST /api/v1/extraction/jobs` (or `/jobs/file` for multipart uploads). Returned job IDs are polled via `_wait_for_job_result` just like the stateless path.
3. `ExtractionAgent.list_extraction_runs` and `delete_extraction_run` map to `GET /api/v1/extraction/runs` (with pagination) and `DELETE /api/v1/extraction/runs/{run_id}` respectively.
4. Manual inspection helpers (`get_extraction_job`, `get_extraction_run_for_job`, `get_extraction_run`) call `GET /api/v1/extraction/jobs/{job_id}` and `GET /api/v1/extraction/runs/by-job/{job_id}` / `GET /api/v1/extraction/runs/{run_id}`.
### Files API touch points
- Uploads default to presigned URLs: the SDK first calls `POST /api/v1/files/generate-presigned-url`, then performs an HTTP PUT to the returned URL, and finally fetches the file metadata via `GET /api/v1/files/{file_id}`.
- When BYOC deployments disable presigned uploads, `FileClient` falls back to `POST /api/v1/files/upload`.
### Implications for the fake server
- **API-level parity**: mocking should happen at the HTTP layer (matching the endpoints listed above) so new SDKs can reuse the fake by simply pointing their base URL at it.
- **State surfaces**: to emulate production, the fake needs in-memory stores for files, jobs, and runs keyed by UUIDs, plus schema validation stubs that mimic the `/schema/validation` endpoint.
- **Deterministic generators**: since `ExtractRun.data` is derived from schema + file, implementing the generator once at the API layer ensures consistency across SDKs.
- **Error simulation hooks**: overrides should let us short-circuit any endpoint (jobs, runs, schema validation) without changing SDK code, mirroring how the real API might fail.
This map should serve as the checklist when we implement the mock: if an SDK method calls a certain path, our fake server must expose the same path with compatible request/response bodies so we can eventually lift these utilities into a standalone package.
## Implementation status
- `FakeLlamaCloudServer` now lives in `llama_cloud_services.testing_utils` and registers the files, extract, parse, and classify namespaces by default. It can be used as `with FakeLlamaCloudServer(): ...` or by calling `install()` / `uninstall()` explicitly.
- Deterministic payloads derive from file fingerprints plus schema hashes for extraction, seeded layout information for parse, and rule/file hashes for classification. The helper exposes matcher-driven overrides (`stub_run`, `stub_agent_run`, `files.stub_upload`) so tests can simulate errors.
- Common `respx.Route` handles are exposed via namespaces (`fake.extract.stateless_run`, `fake.extract.agent_job`, `fake.files.routes["upload"]`, etc.) for assertions that mirror the examples in this spec.
- End-to-end usage is covered in `py/unit_tests/testing_utils/test_fake_server.py`, which exercises stateless extraction, agent-backed uploads, and LlamaParse readers entirely against the fake server without external network calls.
@@ -0,0 +1,75 @@
from __future__ import annotations
from pathlib import Path
import pytest
from llama_cloud import ExtractConfig
from llama_cloud.types import ExtractMode
from llama_cloud_services.extract import LlamaExtract
from llama_cloud_services.parse import LlamaParse
from llama_cloud_services.testing_utils import FakeLlamaCloudServer
from pydantic import BaseModel, Field
class Receipt(BaseModel):
merchant: str = Field(description="Vendor name")
total: float = Field(description="Grand total")
@pytest.fixture(autouse=True)
def _fake_env(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("LLAMA_CLOUD_API_KEY", "unit-test-key")
monkeypatch.setenv("LLAMA_CLOUD_BASE_URL", FakeLlamaCloudServer.DEFAULT_BASE_URL)
@pytest.fixture
def fake_server() -> FakeLlamaCloudServer:
with FakeLlamaCloudServer() as server:
yield server
def _write_sample_file(tmp_path: Path, name: str, content: str) -> Path:
target = tmp_path / name
target.write_text(content)
return target
def test_stateless_extract_is_deterministic(fake_server: FakeLlamaCloudServer, tmp_path: Path) -> None:
extractor = LlamaExtract(api_key="unit-test-key", verify=False)
config = ExtractConfig(extraction_mode=ExtractMode.FAST)
sample_path = _write_sample_file(tmp_path, "receipt.txt", "Merchant: Lunar Bistro\nTotal: 123.45")
first_run = extractor.extract(Receipt, config, sample_path)
second_run = extractor.extract(Receipt, config, sample_path)
assert first_run.status.value == "SUCCESS"
assert second_run.data == first_run.data
assert "merchant" in first_run.data
assert fake_server.extract.stateless_run.called
def test_agent_flow_uploads_and_processes_files(fake_server: FakeLlamaCloudServer, tmp_path: Path) -> None:
extractor = LlamaExtract(api_key="unit-test-key", verify=False)
config = ExtractConfig(extraction_mode=ExtractMode.FAST)
agent = extractor.create_agent(name="unit-test-agent", data_schema=Receipt, config=config)
sample_path = _write_sample_file(tmp_path, "contract.pdf", "Agreement between parties.")
run = agent.extract(sample_path)
assert run.status.value == "SUCCESS"
assert "merchant" in run.data
uploaded_bytes = fake_server.files.read(run.file.id)
assert uploaded_bytes.startswith(b"Agreement")
assert fake_server.extract.agent_job.called
assert fake_server.extract.agent_run.called
def test_parse_load_data_returns_documents(fake_server: FakeLlamaCloudServer, tmp_path: Path) -> None:
parser = LlamaParse(api_key="unit-test-key", base_url=FakeLlamaCloudServer.DEFAULT_BASE_URL)
sample_path = _write_sample_file(tmp_path, "report.pdf", "Executive summary of quarterly goals.")
documents = parser.load_data(sample_path)
assert documents
assert "(page 1)" in documents[0].text
Generated
+259 -7
View File
@@ -3,7 +3,8 @@ revision = 3
requires-python = ">=3.9, <4.0"
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version >= '3.11' and python_full_version < '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
"python_full_version == '3.10.*'",
"python_full_version < '3.10'",
]
@@ -220,7 +221,8 @@ name = "argon2-cffi-bindings"
version = "25.1.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.11' and python_full_version < '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
"python_full_version == '3.10.*'",
"python_full_version < '3.10'",
]
@@ -589,7 +591,8 @@ version = "8.2.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version >= '3.11' and python_full_version < '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
"python_full_version == '3.10.*'",
]
dependencies = [
@@ -720,6 +723,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
]
[[package]]
name = "et-xmlfile"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
]
[[package]]
name = "eval-type-backport"
version = "0.2.2"
@@ -1120,7 +1132,8 @@ version = "8.37.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version >= '3.11' and python_full_version < '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
"python_full_version == '3.10.*'",
]
dependencies = [
@@ -1596,7 +1609,7 @@ wheels = [
[[package]]
name = "llama-cloud-services"
version = "0.6.79"
version = "0.6.81"
source = { editable = "." }
dependencies = [
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1608,6 +1621,7 @@ dependencies = [
{ name = "platformdirs" },
{ name = "pydantic" },
{ name = "python-dotenv" },
{ name = "respx" },
{ name = "tenacity" },
]
@@ -1620,7 +1634,11 @@ dev = [
{ name = "ipython", version = "8.37.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
{ name = "jupyter" },
{ name = "mypy" },
{ name = "openpyxl" },
{ name = "pandas" },
{ name = "pre-commit" },
{ name = "pyarrow", version = "21.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
{ name = "pydantic-settings" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
@@ -1637,6 +1655,7 @@ requires-dist = [
{ name = "platformdirs", specifier = ">=4.3.7,<5" },
{ name = "pydantic", specifier = ">=2.8,!=2.10" },
{ name = "python-dotenv", specifier = ">=1.0.1,<2" },
{ name = "respx", extras = ["tests"], specifier = ">=0.22.0" },
{ name = "tenacity", specifier = ">=8.5.0,<10.0" },
]
@@ -1648,7 +1667,10 @@ dev = [
{ name = "ipython", specifier = ">=8.12.3,<9" },
{ name = "jupyter", specifier = ">=1.1.1,<2" },
{ name = "mypy", specifier = ">=1.14.1,<2" },
{ name = "openpyxl" },
{ name = "pandas" },
{ name = "pre-commit", specifier = "==3.2.0" },
{ name = "pyarrow" },
{ name = "pydantic-settings", specifier = ">=2.10.1" },
{ name = "pytest", specifier = ">=8.0.0,<9" },
{ name = "pytest-asyncio" },
@@ -2098,7 +2120,8 @@ version = "3.5"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version >= '3.11' and python_full_version < '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
]
sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
wheels = [
@@ -2284,7 +2307,8 @@ version = "2.3.2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version >= '3.11' and python_full_version < '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
]
sdist = { url = "https://files.pythonhosted.org/packages/37/7d/3fec4199c5ffb892bed55cff901e4f39a58c81df9c44c280499e92cad264/numpy-2.3.2.tar.gz", hash = "sha256:e0486a11ec30cdecb53f184d496d1c6a20786c81e55e41640270130056f8ee48", size = 20489306, upload-time = "2025-07-24T21:32:07.553Z" }
wheels = [
@@ -2363,6 +2387,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/78/e3/6690b3f85a05506733c7e90b577e4762517404ea78bab2ca3a5cb1aeb78d/numpy-2.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6936aff90dda378c09bea075af0d9c675fe3a977a9d2402f95a87f440f59f619", size = 12977811, upload-time = "2025-07-24T21:29:18.234Z" },
]
[[package]]
name = "openpyxl"
version = "3.1.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "et-xmlfile" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" },
]
[[package]]
name = "orderly-set"
version = "5.5.0"
@@ -2390,6 +2426,76 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pandas"
version = "2.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
{ name = "python-dateutil" },
{ name = "pytz" },
{ name = "tzdata" },
]
sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763, upload-time = "2025-09-29T23:16:53.287Z" },
{ url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217, upload-time = "2025-09-29T23:17:04.522Z" },
{ url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791, upload-time = "2025-09-29T23:17:18.444Z" },
{ url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373, upload-time = "2025-09-29T23:17:35.846Z" },
{ url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444, upload-time = "2025-09-29T23:17:49.341Z" },
{ url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459, upload-time = "2025-09-29T23:18:03.722Z" },
{ url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086, upload-time = "2025-09-29T23:18:18.505Z" },
{ url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" },
{ url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" },
{ url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" },
{ url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" },
{ url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" },
{ url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" },
{ url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702, upload-time = "2025-09-29T23:19:38.296Z" },
{ url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload-time = "2025-09-29T23:19:48.856Z" },
{ url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload-time = "2025-09-29T23:39:08.659Z" },
{ url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload-time = "2025-09-29T23:19:59.765Z" },
{ url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693, upload-time = "2025-09-29T23:20:14.098Z" },
{ url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002, upload-time = "2025-09-29T23:20:26.76Z" },
{ url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971, upload-time = "2025-09-29T23:20:41.344Z" },
{ url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" },
{ url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671, upload-time = "2025-09-29T23:21:05.024Z" },
{ url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807, upload-time = "2025-09-29T23:21:15.979Z" },
{ url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872, upload-time = "2025-09-29T23:21:27.165Z" },
{ url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371, upload-time = "2025-09-29T23:21:40.532Z" },
{ url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333, upload-time = "2025-09-29T23:21:55.77Z" },
{ url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120, upload-time = "2025-09-29T23:22:10.109Z" },
{ url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991, upload-time = "2025-09-29T23:25:04.889Z" },
{ url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227, upload-time = "2025-09-29T23:22:24.343Z" },
{ url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056, upload-time = "2025-09-29T23:22:37.762Z" },
{ url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189, upload-time = "2025-09-29T23:22:51.688Z" },
{ url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912, upload-time = "2025-09-29T23:23:05.042Z" },
{ url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160, upload-time = "2025-09-29T23:23:28.57Z" },
{ url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233, upload-time = "2025-09-29T23:24:24.876Z" },
{ url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635, upload-time = "2025-09-29T23:25:52.486Z" },
{ url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079, upload-time = "2025-09-29T23:26:33.204Z" },
{ url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049, upload-time = "2025-09-29T23:27:15.384Z" },
{ url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638, upload-time = "2025-09-29T23:27:51.625Z" },
{ url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834, upload-time = "2025-09-29T23:28:21.289Z" },
{ url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925, upload-time = "2025-09-29T23:28:58.261Z" },
{ url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071, upload-time = "2025-09-29T23:32:27.484Z" },
{ url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504, upload-time = "2025-09-29T23:29:31.47Z" },
{ url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702, upload-time = "2025-09-29T23:29:54.591Z" },
{ url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535, upload-time = "2025-09-29T23:30:21.003Z" },
{ url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582, upload-time = "2025-09-29T23:30:43.391Z" },
{ url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963, upload-time = "2025-09-29T23:31:10.009Z" },
{ url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" },
{ url = "https://files.pythonhosted.org/packages/56/b4/52eeb530a99e2a4c55ffcd352772b599ed4473a0f892d127f4147cf0f88e/pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2", size = 11567720, upload-time = "2025-09-29T23:33:06.209Z" },
{ url = "https://files.pythonhosted.org/packages/48/4a/2d8b67632a021bced649ba940455ed441ca854e57d6e7658a6024587b083/pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8", size = 10810302, upload-time = "2025-09-29T23:33:35.846Z" },
{ url = "https://files.pythonhosted.org/packages/13/e6/d2465010ee0569a245c975dc6967b801887068bc893e908239b1f4b6c1ac/pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff", size = 12154874, upload-time = "2025-09-29T23:33:49.939Z" },
{ url = "https://files.pythonhosted.org/packages/1f/18/aae8c0aa69a386a3255940e9317f793808ea79d0a525a97a903366bb2569/pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29", size = 12790141, upload-time = "2025-09-29T23:34:05.655Z" },
{ url = "https://files.pythonhosted.org/packages/f7/26/617f98de789de00c2a444fbe6301bb19e66556ac78cff933d2c98f62f2b4/pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73", size = 13208697, upload-time = "2025-09-29T23:34:21.835Z" },
{ url = "https://files.pythonhosted.org/packages/b9/fb/25709afa4552042bd0e15717c75e9b4a2294c3dc4f7e6ea50f03c5136600/pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9", size = 13879233, upload-time = "2025-09-29T23:34:35.079Z" },
{ url = "https://files.pythonhosted.org/packages/98/af/7be05277859a7bc399da8ba68b88c96b27b48740b6cf49688899c6eb4176/pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa", size = 11359119, upload-time = "2025-09-29T23:34:46.339Z" },
]
[[package]]
name = "pandocfilters"
version = "1.5.1"
@@ -2735,6 +2841,122 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" },
]
[[package]]
name = "pyarrow"
version = "21.0.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version < '3.10'",
]
sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/17/d9/110de31880016e2afc52d8580b397dbe47615defbf09ca8cf55f56c62165/pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26", size = 31196837, upload-time = "2025-07-18T00:54:34.755Z" },
{ url = "https://files.pythonhosted.org/packages/df/5f/c1c1997613abf24fceb087e79432d24c19bc6f7259cab57c2c8e5e545fab/pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79", size = 32659470, upload-time = "2025-07-18T00:54:38.329Z" },
{ url = "https://files.pythonhosted.org/packages/3e/ed/b1589a777816ee33ba123ba1e4f8f02243a844fed0deec97bde9fb21a5cf/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb", size = 41055619, upload-time = "2025-07-18T00:54:42.172Z" },
{ url = "https://files.pythonhosted.org/packages/44/28/b6672962639e85dc0ac36f71ab3a8f5f38e01b51343d7aa372a6b56fa3f3/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51", size = 42733488, upload-time = "2025-07-18T00:54:47.132Z" },
{ url = "https://files.pythonhosted.org/packages/f8/cc/de02c3614874b9089c94eac093f90ca5dfa6d5afe45de3ba847fd950fdf1/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a", size = 43329159, upload-time = "2025-07-18T00:54:51.686Z" },
{ url = "https://files.pythonhosted.org/packages/a6/3e/99473332ac40278f196e105ce30b79ab8affab12f6194802f2593d6b0be2/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594", size = 45050567, upload-time = "2025-07-18T00:54:56.679Z" },
{ url = "https://files.pythonhosted.org/packages/7b/f5/c372ef60593d713e8bfbb7e0c743501605f0ad00719146dc075faf11172b/pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634", size = 26217959, upload-time = "2025-07-18T00:55:00.482Z" },
{ url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" },
{ url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" },
{ url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" },
{ url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" },
{ url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" },
{ url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" },
{ url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" },
{ url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" },
{ url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" },
{ url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" },
{ url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" },
{ url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" },
{ url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
{ url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" },
{ url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" },
{ url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" },
{ url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" },
{ url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" },
{ url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" },
{ url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" },
{ url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" },
{ url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" },
{ url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" },
{ url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" },
{ url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" },
{ url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" },
{ url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" },
{ url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" },
{ url = "https://files.pythonhosted.org/packages/3e/cc/ce4939f4b316457a083dc5718b3982801e8c33f921b3c98e7a93b7c7491f/pyarrow-21.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a7f6524e3747e35f80744537c78e7302cd41deee8baa668d56d55f77d9c464b3", size = 31211248, upload-time = "2025-07-18T00:56:59.7Z" },
{ url = "https://files.pythonhosted.org/packages/1f/c2/7a860931420d73985e2f340f06516b21740c15b28d24a0e99a900bb27d2b/pyarrow-21.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:203003786c9fd253ebcafa44b03c06983c9c8d06c3145e37f1b76a1f317aeae1", size = 32676896, upload-time = "2025-07-18T00:57:03.884Z" },
{ url = "https://files.pythonhosted.org/packages/68/a8/197f989b9a75e59b4ca0db6a13c56f19a0ad8a298c68da9cc28145e0bb97/pyarrow-21.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b4d97e297741796fead24867a8dabf86c87e4584ccc03167e4a811f50fdf74d", size = 41067862, upload-time = "2025-07-18T00:57:07.587Z" },
{ url = "https://files.pythonhosted.org/packages/fa/82/6ecfa89487b35aa21accb014b64e0a6b814cc860d5e3170287bf5135c7d8/pyarrow-21.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:898afce396b80fdda05e3086b4256f8677c671f7b1d27a6976fa011d3fd0a86e", size = 42747508, upload-time = "2025-07-18T00:57:13.917Z" },
{ url = "https://files.pythonhosted.org/packages/3b/b7/ba252f399bbf3addc731e8643c05532cf32e74cebb5e32f8f7409bc243cf/pyarrow-21.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:067c66ca29aaedae08218569a114e413b26e742171f526e828e1064fcdec13f4", size = 43345293, upload-time = "2025-07-18T00:57:19.828Z" },
{ url = "https://files.pythonhosted.org/packages/ff/0a/a20819795bd702b9486f536a8eeb70a6aa64046fce32071c19ec8230dbaa/pyarrow-21.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0c4e75d13eb76295a49e0ea056eb18dbd87d81450bfeb8afa19a7e5a75ae2ad7", size = 45060670, upload-time = "2025-07-18T00:57:24.477Z" },
{ url = "https://files.pythonhosted.org/packages/10/15/6b30e77872012bbfe8265d42a01d5b3c17ef0ac0f2fae531ad91b6a6c02e/pyarrow-21.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdc4c17afda4dab2a9c0b79148a43a7f4e1094916b3e18d8975bfd6d6d52241f", size = 26227521, upload-time = "2025-07-18T00:57:29.119Z" },
]
[[package]]
name = "pyarrow"
version = "22.0.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version >= '3.12' and python_full_version < '3.14'",
"python_full_version == '3.11.*'",
"python_full_version == '3.10.*'",
]
sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" },
{ url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" },
{ url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" },
{ url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" },
{ url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" },
{ url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" },
{ url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" },
{ url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" },
{ url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" },
{ url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" },
{ url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" },
{ url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" },
{ url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" },
{ url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" },
{ url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" },
{ url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" },
{ url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" },
{ url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" },
{ url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" },
{ url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" },
{ url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" },
{ url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" },
{ url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" },
{ url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" },
{ url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" },
{ url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" },
{ url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" },
{ url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" },
{ url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" },
{ url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" },
{ url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" },
{ url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" },
{ url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" },
{ url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" },
{ url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" },
{ url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" },
{ url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" },
{ url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" },
{ url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" },
{ url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" },
{ url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" },
{ url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" },
{ url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" },
{ url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" },
{ url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" },
{ url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" },
{ url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" },
{ url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" },
{ url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" },
]
[[package]]
name = "pycparser"
version = "2.22"
@@ -2969,6 +3191,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163, upload-time = "2025-03-07T07:08:25.627Z" },
]
[[package]]
name = "pytz"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
]
[[package]]
name = "pywin32"
version = "311"
@@ -3360,6 +3591,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" },
]
[[package]]
name = "respx"
version = "0.22.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "httpx" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f4/7c/96bd0bc759cf009675ad1ee1f96535edcb11e9666b985717eb8c87192a95/respx-0.22.0.tar.gz", hash = "sha256:3c8924caa2a50bd71aefc07aa812f2466ff489f1848c96e954a5362d17095d91", size = 28439, upload-time = "2024-12-19T22:33:59.374Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8e/67/afbb0978d5399bc9ea200f1d4489a23c9a1dad4eee6376242b8182389c79/respx-0.22.0-py2.py3-none-any.whl", hash = "sha256:631128d4c9aba15e56903fb5f66fb1eff412ce28dd387ca3a81339e52dbd3ad0", size = 25127, upload-time = "2024-12-19T22:33:57.837Z" },
]
[[package]]
name = "rfc3339-validator"
version = "0.1.4"
@@ -3860,6 +4103,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
]
[[package]]
name = "tzdata"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
]
[[package]]
name = "uri-template"
version = "1.3.0"