Classify v2 fake, agent_data.create migration, downstream copier updates (#268)

This commit is contained in:
Adrian Lyjak
2026-04-22 21:34:28 -04:00
committed by GitHub
parent f1bfcca3b8
commit 7074e13091
7 changed files with 58 additions and 6 deletions
+1 -1
View File
@@ -1,3 +1,3 @@
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
_commit: v0.7.2
_commit: v0.7.3
_src_path: https://github.com/run-llama/template-workflow-data-extraction
+11 -2
View File
@@ -1,11 +1,11 @@
[project]
name = "extraction-review"
name = "extract-reconcile-invoice"
version = "0.1.0"
description = "Extracts data"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"llama-cloud>=2.3.0,<3",
"llama-cloud>=2.4.1,<3",
"json-schema-to-pydantic>=0.4.8",
"llama-index-workflows>=2.16.0,<3.0.0",
"python-dotenv>=1.1.0",
@@ -24,12 +24,17 @@ dev = [
"pytest>=8.4.1",
"hatch>=1.14.1",
"pytest-asyncio>=1.3.0",
"pytest-timeout>=2.3.1",
"llama-cloud-fake>=0.1,<0.2",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/extraction_review"]
[tool.hatch.envs.default.scripts]
"format" = "ruff format ."
"format-check" = "ruff format --check ."
@@ -40,6 +45,10 @@ test = "pytest"
"all-check" = ["format-check", "lint-check", "test"]
"all-fix" = ["format", "lint", "test"]
[tool.pytest.ini_options]
timeout = 120
timeout_method = "thread"
[tool.llamadeploy]
env_files = [".env"]
llama_cloud = true
+1 -1
View File
@@ -451,7 +451,7 @@ Provide your analysis in the specified format."""
f"Removed {delete_result.deleted_count} existing record(s) "
f"for file {extracted_data.file_name}"
)
item = await llama_cloud_client.beta.agent_data.agent_data(
item = await llama_cloud_client.beta.agent_data.create(
data=data_dict,
deployment_name=agent_name or "_public",
collection=EXTRACTED_DATA_COLLECTION,
+20
View File
@@ -0,0 +1,20 @@
"""Pytest configuration: install the LlamaCloud fake server for all tests."""
import logging
import sys
import pytest
from llama_cloud_fake import FakeLlamaCloudServer
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
_fake = FakeLlamaCloudServer().install()
@pytest.fixture
def fake() -> FakeLlamaCloudServer:
return _fake
Binary file not shown.
-2
View File
@@ -1,2 +0,0 @@
def test_placeholder():
pass
+25
View File
@@ -0,0 +1,25 @@
import pytest
from extraction_review.process_file import FileEvent
from extraction_review.process_file import workflow as process_file_workflow
from llama_cloud_fake import FakeLlamaCloudServer
@pytest.mark.asyncio
async def test_process_file_workflow(
monkeypatch: pytest.MonkeyPatch,
fake: FakeLlamaCloudServer,
) -> None:
monkeypatch.setenv("LLAMA_CLOUD_API_KEY", "fake-api-key")
file_id = fake.files.preload(path="tests/files/test.pdf")
try:
result = await process_file_workflow.run(start_event=FileEvent(file_id=file_id))
except Exception:
result = None
assert result is not None
assert isinstance(result, str)
assert len(result) == 7
# metadata_workflow here calls `pipelines.upsert` to ensure the contracts
# index exists, which isn't covered by the fake in llama-cloud-fake<0.1.1.
# Add a test for it once the floor is bumped.