Fix index-contract API, add fake tests for parsing/invoice templates (#270)

- extract-reconcile-invoice: index_contract.py called files.get_file /
  files.read_file_content / files.upload_file (v1 SDK surface removed in
  v2), breaking every Upload Contract click. Switched to files.list +
  files.get + files.create.
- Added test_index_contract_workflow and test_metadata_workflow to
  exercise the full v2 surface (files.list, files.get, pipelines.upsert,
  pipelines.documents.upsert, configurations.retrieve).
- document-parsing and invoice-extraction had only placeholder tests —
  wired up llama-cloud-fake and added parametrized tests covering every
  tier/mode. invoice-extraction's test drives the HIL step.
- Bumped requires-python to >=3.12 on both (fake floor).
This commit is contained in:
Adrian Lyjak
2026-04-24 16:52:40 -04:00
committed by GitHub
parent 4b9b0d1f8d
commit 935ad538fb
5 changed files with 49 additions and 13 deletions
+8 -1
View File
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
name = "document-parsing"
version = "0.1.0"
description = "A workflow that, using LlamaParse, parses unstructured documents and returns their raw text content."
requires-python = ">=3.10"
requires-python = ">=3.12"
readme = "README.md"
dependencies = [
"llama-index-workflows>=2.16.0,<3.0.0",
@@ -18,10 +18,17 @@ dev = [
"hatch>=1.14.2",
"pytest>=8.4.2",
"pytest-asyncio>=1.3.0",
"pytest-timeout>=2.3.1",
"llama-cloud-fake>=0.1,<0.2",
"ruff>=0.13.2",
"ty>=0.0.2",
]
[tool.pytest.ini_options]
timeout = 120
timeout_method = "thread"
asyncio_mode = "auto"
[tool.hatch.envs.default.scripts]
format = "ruff format ."
format-check = "ruff format --check ."
+20
View File
@@ -0,0 +1,20 @@
"""Pytest configuration: install the LlamaCloud fake server for all tests."""
import logging
import sys
import pytest
from llama_cloud_fake import FakeLlamaCloudServer
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
_fake = FakeLlamaCloudServer().install()
@pytest.fixture
def fake() -> FakeLlamaCloudServer:
return _fake
Binary file not shown.
-12
View File
@@ -1,12 +0,0 @@
"""Placeholder test file.
Replace this with actual tests for your project.
"""
def test_placeholder() -> None:
"""Placeholder test that always passes.
Remove this test once you add real tests to your project.
"""
assert True
+21
View File
@@ -0,0 +1,21 @@
import pytest
from document_parsing.workflow import workflow
from llama_cloud_fake import FakeLlamaCloudServer
@pytest.mark.parametrize("parsing_mode", ["cost_effective", "agentic", "agentic_plus"])
async def test_document_parsing_workflow(
monkeypatch: pytest.MonkeyPatch,
fake: FakeLlamaCloudServer,
parsing_mode: str,
) -> None:
"""Exercise parsing.parse against the fake for each tier."""
monkeypatch.setenv("LLAMA_CLOUD_API_KEY", "fake-api-key")
result = await workflow.run(
document_path="tests/files/test.pdf",
parsing_mode=parsing_mode,
)
# The SDK call succeeded and returned markdown_full as a string.
# The fake's payload may be empty; we only care the v2 `parsing.parse`
# surface is wired correctly.
assert isinstance(result, str)