mirror of
https://github.com/run-llama/llama_cloud_services.git
synced 2026-07-01 21:44:37 -04:00
Use error description in invalid extraction error (#1081)
* fix: display extraction job error in InvalidExtractionData exception Refactored InvalidExtractionData to read the `error` field from ExtractRun and prominently display it in the exception message. The job-level error is now stored in the `extraction_error` attribute and included in the invalid_item's metadata as `job_error`. * Create three-yaks-beg.md --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
---
|
||||
"llama-cloud-services-py": patch
|
||||
---
|
||||
|
||||
Use error description in ExtractedData invalid extraction error
|
||||
@@ -475,26 +475,49 @@ class ExtractedData(BaseModel, Generic[ExtractedT]):
|
||||
},
|
||||
)
|
||||
except ValidationError as e:
|
||||
# Capture the job-level error from the extraction run if available
|
||||
job_error = result.error
|
||||
|
||||
invalid_item = ExtractedData[Dict[str, Any]].create(
|
||||
data=result.data or {},
|
||||
status="error",
|
||||
field_metadata=field_metadata,
|
||||
metadata={"extraction_error": str(e), **(metadata or {})},
|
||||
metadata={
|
||||
"extraction_error": str(e),
|
||||
**({"job_error": job_error} if job_error else {}),
|
||||
**(metadata or {}),
|
||||
},
|
||||
file_id=file_id,
|
||||
file_name=file_name,
|
||||
file_hash=file_hash,
|
||||
)
|
||||
raise InvalidExtractionData(invalid_item) from e
|
||||
raise InvalidExtractionData(invalid_item, extraction_error=job_error) from e
|
||||
|
||||
|
||||
class InvalidExtractionData(Exception):
|
||||
"""
|
||||
Exception raised when the extracted data does not conform to the schema.
|
||||
|
||||
Attributes:
|
||||
invalid_item: The ExtractedData instance containing the invalid data and metadata
|
||||
extraction_error: The error message from the extraction job, if available
|
||||
"""
|
||||
|
||||
def __init__(self, invalid_item: ExtractedData[Dict[str, Any]]):
|
||||
def __init__(
|
||||
self,
|
||||
invalid_item: ExtractedData[Dict[str, Any]],
|
||||
extraction_error: Optional[str] = None,
|
||||
):
|
||||
self.invalid_item = invalid_item
|
||||
super().__init__("Not able to parse the extracted data, parsed invalid format")
|
||||
self.extraction_error = extraction_error
|
||||
|
||||
# Build an informative error message
|
||||
if extraction_error:
|
||||
message = f"Extraction error: {extraction_error}"
|
||||
else:
|
||||
message = "Not able to parse the extracted data, parsed invalid format"
|
||||
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def calculate_overall_confidence(
|
||||
|
||||
@@ -423,6 +423,7 @@ def create_extract_run(
|
||||
},
|
||||
data_schema: Dict[str, Any] = {},
|
||||
file: File = create_file(),
|
||||
error: Optional[str] = None,
|
||||
) -> ExtractRun:
|
||||
return ExtractRun.parse_obj(
|
||||
{
|
||||
@@ -439,6 +440,7 @@ def create_extract_run(
|
||||
"status": "SUCCESS",
|
||||
"project_id": str(uuid.uuid4()),
|
||||
"from_ui": False,
|
||||
"error": error,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -544,6 +546,46 @@ def test_extracted_data_from_extraction_result_invalid_data():
|
||||
assert invalid_data.field_metadata["name"].confidence == 0.9
|
||||
assert invalid_data.overall_confidence == 0.9
|
||||
|
||||
# Verify default error message when no job error present
|
||||
assert exc_info.value.extraction_error is None
|
||||
assert "Not able to parse the extracted data" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_extracted_data_from_extraction_result_with_job_error():
|
||||
"""Test ExtractedData.from_extraction_result with job-level error prominently displayed."""
|
||||
job_error_message = "Failed to process document: unsupported file format"
|
||||
|
||||
# Create ExtractRun with both invalid data AND a job-level error
|
||||
extract_run = create_extract_run(
|
||||
data={
|
||||
"missing_name": "Valid Name",
|
||||
"age": "not_a_number",
|
||||
}, # Invalid age, missing name
|
||||
extraction_metadata={
|
||||
"name": {"confidence": 0.9},
|
||||
},
|
||||
data_schema={},
|
||||
file=create_file(id="error-file", name="bad_data.pdf"),
|
||||
error=job_error_message,
|
||||
)
|
||||
|
||||
# Should raise InvalidExtractionData with the job error prominently displayed
|
||||
with pytest.raises(InvalidExtractionData) as exc_info:
|
||||
ExtractedData.from_extraction_result(
|
||||
extract_run, Person, metadata={"test": "metadata"}
|
||||
)
|
||||
|
||||
# Verify the exception message prominently shows the job error
|
||||
exception = exc_info.value
|
||||
assert exception.extraction_error == job_error_message
|
||||
assert f"Extraction error: {job_error_message}" == str(exception)
|
||||
|
||||
# Verify the invalid_item contains both errors in metadata
|
||||
invalid_data = exception.invalid_item
|
||||
assert invalid_data.metadata.get("job_error") == job_error_message
|
||||
assert "extraction_error" in invalid_data.metadata # Validation error still present
|
||||
assert "test" in invalid_data.metadata # Original metadata preserved
|
||||
|
||||
|
||||
class Dimensions(BaseModel):
|
||||
length: Optional[str] = Field(
|
||||
|
||||
Reference in New Issue
Block a user