mirror of
https://github.com/run-llama/llama_cloud_services.git
synced 2026-07-01 21:44:37 -04:00
Parse bounding boxes from extract jobs results in agent data (#1067)
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
---
|
||||
"llama-cloud-services-py": patch
|
||||
---
|
||||
|
||||
Parse bounding boxes from extract jobs results in agent data
|
||||
@@ -11,6 +11,9 @@ from .schema import (
|
||||
InvalidExtractionData,
|
||||
ExtractedFieldMetadata,
|
||||
ExtractedFieldMetaDataDict,
|
||||
FieldCitation,
|
||||
BoundingBox,
|
||||
PageDimensions,
|
||||
)
|
||||
from .client import AsyncAgentDataClient
|
||||
|
||||
@@ -28,4 +31,7 @@ __all__ = [
|
||||
"InvalidExtractionData",
|
||||
"ExtractedFieldMetadata",
|
||||
"ExtractedFieldMetaDataDict",
|
||||
"FieldCitation",
|
||||
"BoundingBox",
|
||||
"PageDimensions",
|
||||
]
|
||||
|
||||
@@ -174,6 +174,22 @@ class TypedAgentDataItems(BaseModel, Generic[AgentDataT]):
|
||||
)
|
||||
|
||||
|
||||
class BoundingBox(BaseModel):
|
||||
"""Bounding box coordinates for a citation location on a page."""
|
||||
|
||||
x: float = Field(description="X coordinate of the bounding box origin")
|
||||
y: float = Field(description="Y coordinate of the bounding box origin")
|
||||
w: float = Field(description="Width of the bounding box")
|
||||
h: float = Field(description="Height of the bounding box")
|
||||
|
||||
|
||||
class PageDimensions(BaseModel):
|
||||
"""Dimensions of a page in the source document."""
|
||||
|
||||
width: float = Field(description="Width of the page")
|
||||
height: float = Field(description="Height of the page")
|
||||
|
||||
|
||||
class FieldCitation(BaseModel):
|
||||
page: Optional[int] = Field(
|
||||
None, description="The page number that the field occurred on"
|
||||
@@ -182,6 +198,14 @@ class FieldCitation(BaseModel):
|
||||
None,
|
||||
description="The original text this field's value was derived from",
|
||||
)
|
||||
bounding_boxes: Optional[List[BoundingBox]] = Field(
|
||||
None,
|
||||
description="Bounding boxes indicating where the citation appears on the page",
|
||||
)
|
||||
page_dimensions: Optional[PageDimensions] = Field(
|
||||
None,
|
||||
description="Dimensions of the page containing the citation",
|
||||
)
|
||||
|
||||
|
||||
class ExtractedFieldMetadata(BaseModel):
|
||||
@@ -201,6 +225,10 @@ class ExtractedFieldMetadata(BaseModel):
|
||||
None,
|
||||
description="The confidence score for the field based on the extracted text only",
|
||||
)
|
||||
parsing_confidence: Optional[float] = Field(
|
||||
None,
|
||||
description="The confidence score for the field based on the parsing/OCR quality",
|
||||
)
|
||||
citation: Optional[List[FieldCitation]] = Field(
|
||||
None,
|
||||
description="The citation for the field, including page number and matching text",
|
||||
|
||||
@@ -11,10 +11,12 @@ from llama_cloud.types.aggregate_group import AggregateGroup
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
|
||||
from llama_cloud_services.beta.agent_data.schema import (
|
||||
BoundingBox,
|
||||
ExtractedData,
|
||||
ExtractedFieldMetadata,
|
||||
FieldCitation,
|
||||
InvalidExtractionData,
|
||||
PageDimensions,
|
||||
TypedAgentData,
|
||||
TypedAggregateGroup,
|
||||
calculate_overall_confidence,
|
||||
@@ -663,3 +665,69 @@ def test_field_conflict_in_schema():
|
||||
assert isinstance(
|
||||
extracted["majority_opinion"]["reasoning"], ExtractedFieldMetadata
|
||||
)
|
||||
|
||||
|
||||
def test_parse_extracted_field_metadata_with_bounding_boxes():
|
||||
"""Test parse_extracted_field_metadata with bounding boxes and page dimensions."""
|
||||
raw_metadata = {
|
||||
"document_type": {
|
||||
"citation": [
|
||||
{
|
||||
"page": 1,
|
||||
"matching_text": "FACTURE ORIGINALE",
|
||||
"bounding_boxes": [{"x": 77.28, "y": 615.12, "w": 70.6, "h": 7.2}],
|
||||
"page_dimensions": {"width": 222.24, "height": 736.56},
|
||||
}
|
||||
],
|
||||
"parsing_confidence": 1.0,
|
||||
"extraction_confidence": 0.7252506422636493,
|
||||
"confidence": 0.7252506422636493,
|
||||
},
|
||||
"summary": {
|
||||
"citation": [
|
||||
{
|
||||
"page": 1,
|
||||
"matching_text": "FACTURE ORIGINALE",
|
||||
"bounding_boxes": [{"x": 77.28, "y": 615.12, "w": 70.6, "h": 7.2}],
|
||||
"page_dimensions": {"width": 222.24, "height": 736.56},
|
||||
},
|
||||
{
|
||||
"page": 1,
|
||||
"matching_text": "Café filtre assiette — $1.90",
|
||||
"bounding_boxes": [
|
||||
{"x": 10.56, "y": 172.83, "w": 171.85, "h": 497.01}
|
||||
],
|
||||
"page_dimensions": {"width": 222.24, "height": 736.56},
|
||||
},
|
||||
],
|
||||
"parsing_confidence": 1.0,
|
||||
"extraction_confidence": 0.5700013128334419,
|
||||
"confidence": 0.5700013128334419,
|
||||
},
|
||||
}
|
||||
|
||||
result = parse_extracted_field_metadata(raw_metadata)
|
||||
|
||||
# Verify document_type citation with bounding boxes
|
||||
assert isinstance(result["document_type"], ExtractedFieldMetadata)
|
||||
assert result["document_type"].parsing_confidence == 1.0
|
||||
assert result["document_type"].extraction_confidence == 0.7252506422636493
|
||||
assert result["document_type"].confidence == 0.7252506422636493
|
||||
assert len(result["document_type"].citation) == 1
|
||||
|
||||
citation = result["document_type"].citation[0]
|
||||
assert citation.page == 1
|
||||
assert citation.matching_text == "FACTURE ORIGINALE"
|
||||
assert len(citation.bounding_boxes) == 1
|
||||
assert citation.bounding_boxes[0] == BoundingBox(x=77.28, y=615.12, w=70.6, h=7.2)
|
||||
assert citation.page_dimensions == PageDimensions(width=222.24, height=736.56)
|
||||
|
||||
# Verify summary citation with multiple bounding boxes
|
||||
assert isinstance(result["summary"], ExtractedFieldMetadata)
|
||||
assert len(result["summary"].citation) == 2
|
||||
assert result["summary"].citation[0].bounding_boxes[0].x == 77.28
|
||||
assert result["summary"].citation[1].bounding_boxes[0].x == 10.56
|
||||
|
||||
# Verify round-trip serialization
|
||||
result2 = parse_extracted_field_metadata(result)
|
||||
assert result2 == result
|
||||
|
||||
Generated
+2
-2
@@ -1,5 +1,5 @@
|
||||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.9, <4.0"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14'",
|
||||
@@ -1609,7 +1609,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "llama-cloud-services"
|
||||
version = "0.6.85"
|
||||
version = "0.6.88"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||
|
||||
Reference in New Issue
Block a user