From c1b00b79201fce9f5c9fa809cf1fdf2b7a934405 Mon Sep 17 00:00:00 2001
From: Adrian Lyjak <adrianlyjak@gmail.com>
Date: Wed, 22 Apr 2026 21:34:28 -0400
Subject: [PATCH] Classify v2 fake, agent_data.create migration, downstream
 copier updates (#268)

---
 pyproject.toml                        |  2 +-
 src/extraction_review/clients.py      |  7 -----
 src/extraction_review/process_file.py |  2 +-
 tests/conftest.py                     | 21 +++++++-------
 tests/test_workflow.py                | 41 ++-------------------------
 5 files changed, 15 insertions(+), 58 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 77e5910..add1bfc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "Extracts data"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
-    "llama-cloud>=2.3.0,<3",
+    "llama-cloud>=2.4.1,<3",
     "json-schema-to-pydantic>=0.4.8",
     "llama-index-workflows>=2.16.0,<3.0.0",
     "python-dotenv>=1.1.0",
diff --git a/src/extraction_review/clients.py b/src/extraction_review/clients.py
index b65d270..8b8fbda 100644
--- a/src/extraction_review/clients.py
+++ b/src/extraction_review/clients.py
@@ -13,13 +13,6 @@ api_key = os.getenv("LLAMA_CLOUD_API_KEY")
 base_url = os.getenv("LLAMA_CLOUD_BASE_URL")
 project_id = os.getenv("LLAMA_DEPLOY_PROJECT_ID")
 
-if os.getenv("FAKE_LLAMA_CLOUD"):
-    from llama_cloud_fake import FakeLlamaCloudServer
-
-    fake = FakeLlamaCloudServer().install()
-else:
-    fake = None
-
 
 def get_llama_cloud_client() -> AsyncLlamaCloud:
     """Cloud services connection for file storage and processing."""
diff --git a/src/extraction_review/process_file.py b/src/extraction_review/process_file.py
index a2bbdac..e5013e2 100644
--- a/src/extraction_review/process_file.py
+++ b/src/extraction_review/process_file.py
@@ -221,7 +221,7 @@ class ProcessFileWorkflow(Workflow):
                     f"Removed {delete_result.deleted_count} existing record(s) "
                     f"for file {extracted_data.file_name}"
                 )
-        item = await llama_cloud_client.beta.agent_data.agent_data(
+        item = await llama_cloud_client.beta.agent_data.create(
             data=data_dict,
             deployment_name=agent_name or "_public",
             collection=EXTRACTED_DATA_COLLECTION,
diff --git a/tests/conftest.py b/tests/conftest.py
index 00fdb43..881adaa 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,21 +1,20 @@
-"""
-Pytest configuration for data-extraction tests.
-
-IMPORTANT: FAKE_LLAMA_CLOUD must be set before any test modules are imported,
-as extraction_review.clients reads this at module load time to initialize
-the mock server.
-"""
+"""Pytest configuration: install the LlamaCloud fake server for all tests."""
 
 import logging
-import os
 import sys
 
-# Configure logging to stdout at INFO level
+import pytest
+from llama_cloud_fake import FakeLlamaCloudServer
+
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     handlers=[logging.StreamHandler(sys.stdout)],
 )
 
-# Enable the fake LlamaCloud server for all tests
-os.environ["FAKE_LLAMA_CLOUD"] = "true"
+_fake = FakeLlamaCloudServer().install()
+
+
+@pytest.fixture
+def fake() -> FakeLlamaCloudServer:
+    return _fake
diff --git a/tests/test_workflow.py b/tests/test_workflow.py
index 8b954f6..1f5a81e 100644
--- a/tests/test_workflow.py
+++ b/tests/test_workflow.py
@@ -1,35 +1,15 @@
-"""
-<system>
-This is the default test for the data-extraction template.
-Adapt the test to your use case following the guidelines below.
-</system>
-<guidelines>
-You can read and modify this file.
-Here are your editing permissions, which you **MUST ALWAYS** follow:
-
-- Lines and blocks tagged with `<edit></edit>` should be ALWAYS modified to something different, based on the use case.
-- Lines and blocks tagged with `<adapt></adapt>` should be adapted to the specific use case you are dealing with, but only if needed.
-- If something does not have tags, it **MUST NOT** be modified.
-</guidelines>
-"""
-
 import json
-import warnings
 from pathlib import Path
 
 import pytest
-from extraction_review.clients import fake
-
-# <edit>
 from extraction_review.config import EXTRACTED_DATA_COLLECTION
 from extraction_review.metadata_workflow import MetadataResponse
 from extraction_review.metadata_workflow import workflow as metadata_workflow
 from extraction_review.process_file import FileEvent
 from extraction_review.process_file import workflow as process_file_workflow
+from llama_cloud_fake import FakeLlamaCloudServer
 from workflows.events import StartEvent
 
-# </edit>
-
 
 def get_extraction_schema() -> dict:
     """Load the extraction schema from the unified config file."""
@@ -39,40 +19,25 @@ def get_extraction_schema() -> dict:
 
 
 @pytest.mark.asyncio
-# <adapt>
 async def test_process_file_workflow(
     monkeypatch: pytest.MonkeyPatch,
+    fake: FakeLlamaCloudServer,
 ) -> None:
     monkeypatch.setenv("LLAMA_CLOUD_API_KEY", "fake-api-key")
-    # load a file to the mock LlamaCloud server and retrieve its file id (modify if you don't have any files to load as input)
-    if fake is not None:
-        file_id = fake.files.preload(path="tests/files/test.pdf")
-    else:
-        warnings.warn(
-            "Skipping test because it cannot be mocked. Set `FAKE_LLAMA_CLOUD=true` in your environment to enable this test..."
-        )
-        return
+    file_id = fake.files.preload(path="tests/files/test.pdf")
     try:
         result = await process_file_workflow.run(start_event=FileEvent(file_id=file_id))
     except Exception:
         result = None
     assert result is not None
     # all generated agent data IDs are alphanumeric strings with 7 characters
-    # the following assert statements ensure that that is the case
     assert isinstance(result, str)
     assert len(result) == 7
 
 
-# </adapt>
-
-
-# <adapt>
 @pytest.mark.asyncio
 async def test_metadata_workflow() -> None:
     result = await metadata_workflow.run(start_event=StartEvent())
     assert isinstance(result, MetadataResponse)
     assert result.extracted_data_collection == EXTRACTED_DATA_COLLECTION
     assert result.json_schema == get_extraction_schema()
-
-
-# </adapt>