Parse bounding boxes from extract jobs results in agent data (#1067)

This commit is contained in:
Adrian Lyjak
2026-01-09 18:47:57 -05:00
committed by GitHub
parent 3ec7024626
commit b9b83c953d
5 changed files with 109 additions and 2 deletions
+5
View File
@@ -0,0 +1,5 @@
---
"llama-cloud-services-py": patch
---
Parse bounding boxes from extract jobs results in agent data
@@ -11,6 +11,9 @@ from .schema import (
InvalidExtractionData,
ExtractedFieldMetadata,
ExtractedFieldMetaDataDict,
FieldCitation,
BoundingBox,
PageDimensions,
)
from .client import AsyncAgentDataClient
@@ -28,4 +31,7 @@ __all__ = [
"InvalidExtractionData",
"ExtractedFieldMetadata",
"ExtractedFieldMetaDataDict",
"FieldCitation",
"BoundingBox",
"PageDimensions",
]
@@ -174,6 +174,22 @@ class TypedAgentDataItems(BaseModel, Generic[AgentDataT]):
)
class BoundingBox(BaseModel):
"""Bounding box coordinates for a citation location on a page."""
x: float = Field(description="X coordinate of the bounding box origin")
y: float = Field(description="Y coordinate of the bounding box origin")
w: float = Field(description="Width of the bounding box")
h: float = Field(description="Height of the bounding box")
class PageDimensions(BaseModel):
"""Dimensions of a page in the source document."""
width: float = Field(description="Width of the page")
height: float = Field(description="Height of the page")
class FieldCitation(BaseModel):
page: Optional[int] = Field(
None, description="The page number that the field occurred on"
@@ -182,6 +198,14 @@ class FieldCitation(BaseModel):
None,
description="The original text this field's value was derived from",
)
bounding_boxes: Optional[List[BoundingBox]] = Field(
None,
description="Bounding boxes indicating where the citation appears on the page",
)
page_dimensions: Optional[PageDimensions] = Field(
None,
description="Dimensions of the page containing the citation",
)
class ExtractedFieldMetadata(BaseModel):
@@ -201,6 +225,10 @@ class ExtractedFieldMetadata(BaseModel):
None,
description="The confidence score for the field based on the extracted text only",
)
parsing_confidence: Optional[float] = Field(
None,
description="The confidence score for the field based on the parsing/OCR quality",
)
citation: Optional[List[FieldCitation]] = Field(
None,
description="The citation for the field, including page number and matching text",
@@ -11,10 +11,12 @@ from llama_cloud.types.aggregate_group import AggregateGroup
from pydantic import BaseModel, Field, ValidationError
from llama_cloud_services.beta.agent_data.schema import (
BoundingBox,
ExtractedData,
ExtractedFieldMetadata,
FieldCitation,
InvalidExtractionData,
PageDimensions,
TypedAgentData,
TypedAggregateGroup,
calculate_overall_confidence,
@@ -663,3 +665,69 @@ def test_field_conflict_in_schema():
assert isinstance(
extracted["majority_opinion"]["reasoning"], ExtractedFieldMetadata
)
def test_parse_extracted_field_metadata_with_bounding_boxes():
"""Test parse_extracted_field_metadata with bounding boxes and page dimensions."""
raw_metadata = {
"document_type": {
"citation": [
{
"page": 1,
"matching_text": "FACTURE ORIGINALE",
"bounding_boxes": [{"x": 77.28, "y": 615.12, "w": 70.6, "h": 7.2}],
"page_dimensions": {"width": 222.24, "height": 736.56},
}
],
"parsing_confidence": 1.0,
"extraction_confidence": 0.7252506422636493,
"confidence": 0.7252506422636493,
},
"summary": {
"citation": [
{
"page": 1,
"matching_text": "FACTURE ORIGINALE",
"bounding_boxes": [{"x": 77.28, "y": 615.12, "w": 70.6, "h": 7.2}],
"page_dimensions": {"width": 222.24, "height": 736.56},
},
{
"page": 1,
"matching_text": "Café filtre assiette — $1.90",
"bounding_boxes": [
{"x": 10.56, "y": 172.83, "w": 171.85, "h": 497.01}
],
"page_dimensions": {"width": 222.24, "height": 736.56},
},
],
"parsing_confidence": 1.0,
"extraction_confidence": 0.5700013128334419,
"confidence": 0.5700013128334419,
},
}
result = parse_extracted_field_metadata(raw_metadata)
# Verify document_type citation with bounding boxes
assert isinstance(result["document_type"], ExtractedFieldMetadata)
assert result["document_type"].parsing_confidence == 1.0
assert result["document_type"].extraction_confidence == 0.7252506422636493
assert result["document_type"].confidence == 0.7252506422636493
assert len(result["document_type"].citation) == 1
citation = result["document_type"].citation[0]
assert citation.page == 1
assert citation.matching_text == "FACTURE ORIGINALE"
assert len(citation.bounding_boxes) == 1
assert citation.bounding_boxes[0] == BoundingBox(x=77.28, y=615.12, w=70.6, h=7.2)
assert citation.page_dimensions == PageDimensions(width=222.24, height=736.56)
# Verify summary citation with multiple bounding boxes
assert isinstance(result["summary"], ExtractedFieldMetadata)
assert len(result["summary"].citation) == 2
assert result["summary"].citation[0].bounding_boxes[0].x == 77.28
assert result["summary"].citation[1].bounding_boxes[0].x == 10.56
# Verify round-trip serialization
result2 = parse_extracted_field_metadata(result)
assert result2 == result
Generated
+2 -2
View File
@@ -1,5 +1,5 @@
version = 1
revision = 2
revision = 3
requires-python = ">=3.9, <4.0"
resolution-markers = [
"python_full_version >= '3.14'",
@@ -1609,7 +1609,7 @@ wheels = [
[[package]]
name = "llama-cloud-services"
version = "0.6.85"
version = "0.6.88"
source = { editable = "." }
dependencies = [
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },