Compare commits

...

8 Commits

Author SHA1 Message Date
Adrian Lyjak 6f708ff39e skip broken test 2025-10-02 22:47:20 -04:00
Adrian Lyjak 43b249f93d clean tests 2025-10-02 22:30:16 -04:00
Adrian Lyjak d7ea1c27a1 more ts tests 2025-10-02 22:24:40 -04:00
Adrian Lyjak 6879228844 add support for agent data delete query api 2025-10-02 22:24:12 -04:00
Adrian Lyjak 5623a66c22 update ts too 2025-10-02 22:12:26 -04:00
Neeraj Pradhan 46cd974082 fix unit test 2025-10-02 16:16:14 -07:00
Neeraj Pradhan b495e4b897 remove test api 2025-10-02 16:06:24 -07:00
Adrian Lyjak ded6ae9383 Update llama-cloud dep 2025-10-02 18:34:22 -04:00
19 changed files with 3761 additions and 2915 deletions
@@ -194,6 +194,21 @@ class AsyncAgentDataClient(Generic[AgentDataT]):
async def delete_item(self, item_id: str) -> None:
await self.client.beta.delete_agent_data(item_id=item_id)
@agent_data_retry
async def delete(
self, filter: Optional[Dict[str, Dict[ComparisonOperator, Any]]] = None
) -> int:
"""
Delete agent data by query, similar to search.
Returns the number of deleted items.
"""
response = await self.client.beta.delete_agent_data_by_query_api_v_1_beta_agent_data_delete_post(
deployment_name=self.deployment_name,
collection=self.collection,
filter=filter,
)
return response.deleted_count
@agent_data_retry
async def search(
self,
+4 -58
View File
@@ -19,14 +19,12 @@ from llama_cloud import (
ExtractAgent as CloudExtractAgent,
ExtractConfig,
ExtractJob,
ExtractJobCreate,
ExtractRun,
File,
FileData,
ExtractMode,
StatusEnum,
ExtractTarget,
LlamaExtractSettings,
PaginatedExtractRunsResponse,
)
from llama_cloud.client import AsyncLlamaCloud
@@ -463,56 +461,6 @@ class ExtractionAgent:
)
)
async def _run_extraction_test(
self,
files: Union[FileInput, List[FileInput]],
extract_settings: LlamaExtractSettings,
) -> Union[ExtractJob, List[ExtractJob]]:
if not isinstance(files, list):
files = [files]
single_file = True
else:
single_file = False
upload_tasks = [self._upload_file(file) for file in files]
with augment_async_errors():
uploaded_files = await run_jobs(
upload_tasks,
workers=self.num_workers,
desc="Uploading files",
show_progress=self.show_progress,
)
async def run_job(file: File) -> ExtractRun:
job_queued = await self._client.llama_extract.run_job_test_user(
job_create=ExtractJobCreate(
extraction_agent_id=self.id,
file_id=file.id,
data_schema_override=self.data_schema,
config_override=self.config,
),
extract_settings=extract_settings,
)
return await self._wait_for_job_result(job_queued.id)
job_tasks = [run_job(file) for file in uploaded_files]
with augment_async_errors():
extract_results = await run_jobs(
job_tasks,
workers=self.num_workers,
desc="Running extraction jobs",
show_progress=self.show_progress,
)
if self._verbose:
for file, job in zip(files, extract_results):
file_repr = (
str(file) if isinstance(file, (str, Path)) else "<bytes/buffer>"
)
print(f"Running extraction for file {file_repr} under job_id {job.id}")
return extract_results[0] if single_file else extract_results
async def queue_extraction(
self,
files: Union[FileInput, List[FileInput]],
@@ -544,12 +492,10 @@ class ExtractionAgent:
job_tasks = [
self._client.llama_extract.run_job(
request=ExtractJobCreate(
extraction_agent_id=self.id,
file_id=file.id,
data_schema_override=self.data_schema,
config_override=self.config,
),
extraction_agent_id=self.id,
file_id=file.id,
data_schema_override=self.data_schema,
config_override=self.config,
)
for file in uploaded_files
]
+1 -1
View File
@@ -27,7 +27,7 @@ readme = "README.md"
license = "MIT"
dependencies = [
"llama-index-core>=0.12.0",
"llama-cloud==0.1.42",
"llama-cloud==0.1.43",
"pydantic>=2.8,!=2.10",
"click>=8.1.7,<9",
"python-dotenv>=1.0.1,<2",
+1 -28
View File
@@ -1,16 +1,13 @@
import os
import pytest
from llama_cloud_services.extract import LlamaExtract, ExtractionAgent
from time import perf_counter
from llama_cloud_services.extract import LlamaExtract
from collections import namedtuple
import json
import uuid
from llama_cloud.types import (
ExtractConfig,
ExtractMode,
LlamaParseParameters,
LlamaExtractSettings,
)
from tests.extract.util import load_test_dotenv
@@ -122,27 +119,3 @@ def extraction_agent(test_case: BenchmarkTestCase, extractor: LlamaExtract):
# Create new agent
agent = extractor.create_agent(agent_name, schema, config=test_case.config)
yield agent
@pytest.mark.skipif(
"CI" in os.environ or not LLAMA_CLOUD_API_KEY,
reason="LLAMA_CLOUD_API_KEY not set or CI environment not suitable for benchmarking",
)
@pytest.mark.parametrize("test_case", get_test_cases(), ids=lambda x: x.name)
@pytest.mark.asyncio(loop_scope="session")
async def test_extraction(
test_case: BenchmarkTestCase, extraction_agent: ExtractionAgent
) -> None:
start = perf_counter()
result = await extraction_agent._run_extraction_test(
test_case.input_file,
extract_settings=LlamaExtractSettings(
llama_parse_params=LlamaParseParameters(
invalidate_cache=True,
do_not_cache=True,
)
),
)
end = perf_counter()
print(f"Time taken: {end - start} seconds")
print(result)
+1 -1
View File
@@ -7,7 +7,7 @@ from pathlib import Path
def load_test_dotenv():
load_dotenv(Path(__file__).parent.parent.parent / ".env.dev", override=True)
load_dotenv(Path(__file__).parent.parent.parent.parent / ".env.dev", override=True)
def json_subset_match_score(expected: Any, actual: Any) -> float:
+3
View File
@@ -304,6 +304,9 @@ async def test_page_screenshot_retrieval(index_name: str, local_file: str):
not base_url or not api_key, reason="No platform base url or api key set"
)
@pytest.mark.asyncio
@pytest.mark.skip(
reason="Consistently failing with FAILED tests/index/test_index.py::test_page_figure_retrieval - assert 0 > 0 + where 0 = len([])"
)
async def test_page_figure_retrieval(index_name: str, local_figures_file: str):
index = await LlamaCloudIndex.acreate_index(
name=index_name,
+2 -4
View File
@@ -118,10 +118,8 @@ async def test_extraction_agent_aextract_accepts_llama_file(
dummy_llama_extract_iface = SimpleNamespace()
async def fake_run_job(**kwargs):
# Ensure we are receiving a request with the right file_id
request = kwargs.get("request")
assert hasattr(request, "file_id")
assert request.file_id == llama_file.id
file_id = kwargs.get("file_id")
assert file_id == llama_file.id
return SimpleNamespace(id="job_42")
dummy_llama_extract_iface.run_job = fake_run_job
Generated
+5 -5
View File
@@ -1582,21 +1582,21 @@ wheels = [
[[package]]
name = "llama-cloud"
version = "0.1.42"
version = "0.1.43"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "httpx" },
{ name = "pydantic" },
]
sdist = { url = "https://files.pythonhosted.org/packages/21/04/ae0694b582d6aab4d6e7957febb7bff048897ac231ad80ba1bd71547d944/llama_cloud-0.1.42.tar.gz", hash = "sha256:485aa0e364ea648e3aaa3b2c54af7bcb6f2242c50b4f86ec022e137413fff464", size = 112480, upload-time = "2025-09-16T20:25:42.631Z" }
sdist = { url = "https://files.pythonhosted.org/packages/9b/33/33a8bd3a617c071caf450ca2627969f8b28272d0692f122997c10a32247e/llama_cloud-0.1.43.tar.gz", hash = "sha256:00429f05aea515449d90cde91ef3ed3687fcd93e46f6246d08cbea02f9b397a9", size = 112992, upload-time = "2025-10-02T21:55:38.355Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6a/61/85d115699a59d03f0783e119aaf6d534fca95dbe1a4531a8056e6a4774ed/llama_cloud-0.1.42-py3-none-any.whl", hash = "sha256:4ed3edde4a277ff52eeb831188c8476eb079b5e4605ad3142157a0f054b27d96", size = 311857, upload-time = "2025-09-16T20:25:41.479Z" },
{ url = "https://files.pythonhosted.org/packages/2b/54/559a67542396d5660a71115b29e0160e9dd784e570e1f4ef55ad22bf5b39/llama_cloud-0.1.43-py3-none-any.whl", hash = "sha256:540605d4dd13c6536a3b75cd4d04b211f29b16d17faee9381e3793a651f1dec1", size = 311460, upload-time = "2025-10-02T21:55:37.282Z" },
]
[[package]]
name = "llama-cloud-services"
version = "0.6.68"
version = "0.6.69"
source = { editable = "." }
dependencies = [
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1631,7 +1631,7 @@ dev = [
requires-dist = [
{ name = "click", specifier = ">=8.1.7,<9" },
{ name = "eval-type-backport", marker = "python_full_version < '3.10'", specifier = ">=0.2.0,<0.3" },
{ name = "llama-cloud", specifier = "==0.1.42" },
{ name = "llama-cloud", specifier = "==0.1.43" },
{ name = "llama-index-core", specifier = ">=0.12.0" },
{ name = "packaging", specifier = ">=25.0" },
{ name = "platformdirs", specifier = ">=4.3.7,<5" },
File diff suppressed because it is too large Load Diff
+1
View File
@@ -4,6 +4,7 @@
"type": "module",
"license": "MIT",
"scripts": {
"get-openapi": "node ./scripts/get-openapi.js",
"generate": "./node_modules/.bin/openapi-ts",
"build": "pnpm run generate && bunchee",
"dev": "bunchee --watch",
@@ -0,0 +1,21 @@
import fs from 'fs';
async function downloadOpenApiSpec() {
try {
const response = await fetch('https://api.cloud.llamaindex.ai/api/openapi.json');
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
fs.writeFileSync('openapi.json', JSON.stringify(data, null, 2));
console.log('Successfully downloaded openapi.json');
} catch (error) {
console.error('Error downloading OpenAPI spec:', error);
process.exit(1);
}
}
downloadOpenApiSpec();
@@ -4,6 +4,7 @@ import {
aggregateAgentDataApiV1BetaAgentDataAggregatePost,
createAgentDataApiV1BetaAgentDataPost,
deleteAgentDataApiV1BetaAgentDataItemIdDelete,
deleteAgentDataByQueryApiV1BetaAgentDataDeletePost,
getAgentDataApiV1BetaAgentDataItemIdGet,
searchAgentDataApiV1BetaAgentDataSearchPost,
updateAgentDataApiV1BetaAgentDataItemIdPut,
@@ -12,6 +13,7 @@ import {
} from "../../client";
import type {
AggregateAgentDataOptions,
DeleteAgentDataOptions,
SearchAgentDataOptions,
TypedAgentData,
TypedAgentDataItems,
@@ -112,6 +114,24 @@ export class AgentClient<T = unknown> {
});
}
/**
* Delete all matching agent data, returns the total number of deleted items
*/
async delete(options: DeleteAgentDataOptions): Promise<number> {
const response = await deleteAgentDataByQueryApiV1BetaAgentDataDeletePost({
throwOnError: true,
body: {
deployment_name: this.deploymentName,
...(this.collection !== undefined && {
collection: this.collection,
}),
...(options.filter !== undefined && { filter: options.filter }),
},
client: this.client,
});
return response.data.deleted_count;
}
/**
* Search agent data
*/
@@ -127,6 +127,14 @@ export interface SearchAgentDataOptions {
includeTotal?: boolean;
}
/**
* Options for deleting agent data
*/
export interface DeleteAgentDataOptions {
/** Filter options for the deletion. */
filter?: Record<string, FilterOperation>;
}
/**
* Options for aggregating agent data
*/
+288 -355
View File
@@ -1530,27 +1530,6 @@ export const Body_run_job_on_file_api_v1_extraction_jobs_file_postSchema = {
title: "Body_run_job_on_file_api_v1_extraction_jobs_file_post",
} as const;
export const Body_run_job_test_user_api_v1_extraction_jobs_test_postSchema = {
properties: {
job_create: {
$ref: "#/components/schemas/ExtractJobCreate",
},
extract_settings: {
anyOf: [
{
$ref: "#/components/schemas/LlamaExtractSettings",
},
{
type: "null",
},
],
},
},
type: "object",
required: ["job_create"],
title: "Body_run_job_test_user_api_v1_extraction_jobs_test_post",
} as const;
export const Body_screenshot_api_parsing_screenshot_postSchema = {
properties: {
file: {
@@ -2796,30 +2775,6 @@ export const Body_upload_file_api_v1_parsing_upload_postSchema = {
title: "Body_upload_file_api_v1_parsing_upload_post",
} as const;
export const Body_upload_file_v2_api_v2alpha1_parse_upload_postSchema = {
properties: {
configuration: {
type: "string",
title: "Configuration",
},
file: {
anyOf: [
{
type: "string",
format: "binary",
},
{
type: "null",
},
],
title: "File",
},
},
type: "object",
required: ["configuration"],
title: "Body_upload_file_v2_api_v2alpha1_parse_upload_post",
} as const;
export const BoxAuthMechanismSchema = {
type: "string",
enum: ["developer_token", "ccg"],
@@ -3180,12 +3135,6 @@ export const ChatMessageSchema = {
title: "ChatMessage",
} as const;
export const ChunkModeSchema = {
type: "string",
enum: ["PAGE", "DOCUMENT", "SECTION", "GROUPED_PAGES"],
title: "ChunkMode",
} as const;
export const ClassificationResultSchema = {
properties: {
reasoning: {
@@ -5486,6 +5435,13 @@ export const CustomClaimsSchema = {
description: "Whether the user is allowed to delete organizations.",
default: false,
},
allowed_spreadsheet: {
type: "boolean",
title: "Allowed Spreadsheet",
description:
"Whether the user is allowed to access the spreadsheet feature.",
default: false,
},
},
type: "object",
title: "CustomClaims",
@@ -6213,6 +6169,54 @@ export const DeleteParamsSchema = {
description: "Schema for the parameters of a delete job.",
} as const;
export const DeleteRequestSchema = {
properties: {
deployment_name: {
type: "string",
title: "Deployment Name",
description: "The agent deployment's name to delete data for",
},
collection: {
type: "string",
title: "Collection",
description: "The logical agent data collection to delete from",
default: "default",
},
filter: {
anyOf: [
{
additionalProperties: {
$ref: "#/components/schemas/FilterOperation",
},
type: "object",
},
{
type: "null",
},
],
title: "Filter",
description: "Optional filters to select which items to delete",
},
},
type: "object",
required: ["deployment_name"],
title: "DeleteRequest",
description: "API request body for bulk deleting agent data by query",
} as const;
export const DeleteResponseSchema = {
properties: {
deleted_count: {
type: "integer",
title: "Deleted Count",
},
},
type: "object",
required: ["deleted_count"],
title: "DeleteResponse",
description: "API response for bulk delete operation",
} as const;
export const DirectRetrievalParamsSchema = {
properties: {
mode: {
@@ -6946,6 +6950,20 @@ export const ExtractConfigSchema = {
description: "Whether to invalidate the cache for the extraction.",
default: false,
},
num_pages_context: {
anyOf: [
{
type: "integer",
minimum: 1,
},
{
type: "null",
},
],
title: "Num Pages Context",
description:
"Number of pages to pass as context on long document extraction.",
},
page_range: {
anyOf: [
{
@@ -7202,6 +7220,7 @@ export const ExtractModelsSchema = {
"openai-gpt-5-mini",
"gemini-2.0-flash",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemini-2.5-pro",
"openai-gpt-4o",
"openai-gpt-4o-mini",
@@ -7849,6 +7868,52 @@ export const ExtractTargetSchema = {
title: "ExtractTarget",
} as const;
export const ExtractedTableSchema = {
properties: {
table_id: {
type: "integer",
title: "Table Id",
description: "Unique identifier for this table within the file",
},
sheet_name: {
type: "string",
title: "Sheet Name",
description: "Worksheet name where table was found",
},
row_span: {
type: "integer",
title: "Row Span",
description: "Number of rows in the table",
},
col_span: {
type: "integer",
title: "Col Span",
description: "Number of columns in the table",
},
has_headers: {
type: "boolean",
title: "Has Headers",
description: "Whether the table has header rows",
},
metadata_json: {
anyOf: [
{
type: "string",
},
{
type: "null",
},
],
title: "Metadata Json",
description: "JSON metadata with detailed table information",
},
},
type: "object",
required: ["table_id", "sheet_name", "row_span", "col_span", "has_headers"],
title: "ExtractedTable",
description: "A single extracted table from a spreadsheet",
} as const;
export const FailPageModeSchema = {
type: "string",
enum: ["raw_text", "blank_page", "error_message"],
@@ -10828,140 +10893,6 @@ export const LegacyParseJobConfigSchema = {
description: "Configuration for llamaparse job",
} as const;
export const LlamaExtractSettingsSchema = {
properties: {
max_file_size: {
type: "integer",
title: "Max File Size",
description: "The maximum file size (in bytes) allowed for the document.",
default: 104857600,
},
max_file_size_ui: {
type: "integer",
title: "Max File Size Ui",
description: "The maximum file size (in bytes) allowed for the document.",
default: 31457280,
},
max_pages: {
type: "integer",
title: "Max Pages",
description: "The maximum number of pages allowed for the document.",
default: 500,
},
chunk_mode: {
$ref: "#/components/schemas/ChunkMode",
description: "The mode to use for chunking the document.",
default: "SECTION",
},
max_chunk_size: {
type: "integer",
title: "Max Chunk Size",
description:
"The maximum size of the chunks (in tokens) to use for chunking the document.",
default: 10000,
},
extraction_agent_config: {
additionalProperties: {
$ref: "#/components/schemas/StructParseConf",
},
type: "object",
title: "Extraction Agent Config",
description: "The configuration for the extraction agent.",
},
use_multimodal_parsing: {
type: "boolean",
title: "Use Multimodal Parsing",
description: "Whether to use experimental multimodal parsing.",
default: false,
},
use_pixel_extraction: {
type: "boolean",
title: "Use Pixel Extraction",
description:
"DEPRECATED: Whether to use extraction over pixels for multimodal mode.",
default: false,
},
llama_parse_params: {
$ref: "#/components/schemas/LlamaParseParameters",
description: "LlamaParse related settings.",
default: {
languages: ["en"],
parsing_instruction: "",
disable_ocr: false,
annotate_links: true,
adaptive_long_table: true,
compact_markdown_table: false,
disable_reconstruction: false,
disable_image_extraction: false,
invalidate_cache: false,
outlined_table_extraction: true,
merge_tables_across_pages_in_markdown: false,
output_pdf_of_document: false,
do_not_cache: false,
fast_mode: false,
skip_diagonal_text: false,
preserve_layout_alignment_across_pages: false,
preserve_very_small_text: false,
gpt4o_mode: false,
do_not_unroll_columns: false,
extract_layout: false,
high_res_ocr: false,
html_make_all_elements_visible: false,
layout_aware: false,
specialized_chart_parsing_agentic: false,
specialized_chart_parsing_plus: false,
specialized_chart_parsing_efficient: false,
specialized_image_parsing: false,
precise_bounding_box: false,
html_remove_navigation_elements: false,
html_remove_fixed_elements: false,
guess_xlsx_sheet_name: false,
use_vendor_multimodal_model: false,
page_prefix: `<<<PAGE:{pageNumber}>>>
`,
page_suffix: `
<<<END_PAGE>>>`,
take_screenshot: false,
is_formatting_instruction: true,
premium_mode: false,
continuous_mode: false,
auto_mode: false,
auto_mode_trigger_on_table_in_page: false,
auto_mode_trigger_on_image_in_page: false,
structured_output: false,
extract_charts: false,
spreadsheet_extract_sub_tables: false,
spreadsheet_force_formula_computation: false,
inline_images_in_markdown: false,
strict_mode_image_extraction: false,
strict_mode_image_ocr: false,
strict_mode_reconstruction: false,
strict_mode_buggy_font: false,
save_images: true,
hide_headers: false,
hide_footers: false,
ignore_document_elements_for_layout_detection: false,
output_tables_as_HTML: false,
internal_is_screenshot_job: false,
parse_mode: "parse_page_with_llm",
page_error_tolerance: 0.05,
replace_failed_page_mode: "raw_text",
},
},
multimodal_parse_resolution: {
$ref: "#/components/schemas/MultimodalParseResolution",
description: "The resolution to use for multimodal parsing.",
default: "medium",
},
},
type: "object",
title: "LlamaExtractSettings",
description: `All settings for the extraction agent. Only the settings in ExtractConfig
are exposed to the user.`,
} as const;
export const LlamaParseParametersSchema = {
properties: {
webhook_configurations: {
@@ -12602,12 +12533,6 @@ export const MetronomeDashboardTypeSchema = {
title: "MetronomeDashboardType",
} as const;
export const MultimodalParseResolutionSchema = {
type: "string",
enum: ["medium", "high"],
title: "MultimodalParseResolution",
} as const;
export const NodeRelationshipSchema = {
type: "string",
enum: ["1", "2", "3", "4", "5"],
@@ -13430,6 +13355,48 @@ export const PaginatedResponse_QuotaConfiguration_Schema = {
title: "PaginatedResponse[QuotaConfiguration]",
} as const;
export const PaginatedResponse_SpreadsheetJob_Schema = {
properties: {
items: {
items: {
$ref: "#/components/schemas/SpreadsheetJob",
},
type: "array",
title: "Items",
description: "The list of items.",
},
next_page_token: {
anyOf: [
{
type: "string",
},
{
type: "null",
},
],
title: "Next Page Token",
description:
"A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages.",
},
total_size: {
anyOf: [
{
type: "integer",
},
{
type: "null",
},
],
title: "Total Size",
description:
"The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only.",
},
},
type: "object",
required: ["items"],
title: "PaginatedResponse[SpreadsheetJob]",
} as const;
export const ParseConfigurationSchema = {
properties: {
id: {
@@ -17841,69 +17808,6 @@ export const ProjectUpdateSchema = {
description: "Schema for updating a project.",
} as const;
export const PromptConfSchema = {
properties: {
system_prompt: {
type: "string",
title: "System Prompt",
description: "The system prompt to use for the extraction.",
default:
"Given a JSON schema, extract the data from the provided SOURCE TEXT according to the schema. Only output information that is explicitly stated or can be inferred from the SOURCE TEXT.",
},
extraction_prompt: {
type: "string",
title: "Extraction Prompt",
description: "The prompt to use for the extraction.",
default: "The extracted data using the given JSON schema.",
},
error_handling_prompt: {
type: "string",
title: "Error Handling Prompt",
description: "The prompt to use for error handling.",
default:
"If the source text does not contain enough information to extract the value, explain the reason very briefly. Else, output null and fill out the value__ field.",
},
reasoning_prompt: {
type: "string",
title: "Reasoning Prompt",
description: "The prompt to use for reasoning.",
default: `
Provide a brief explanation for how you arrived at the extracted value based on the source text provided.
- For inferred values, explain the reasoning behind the extraction briefly.
- For simple verbatim extraction, output 'VERBATIM EXTRACTION'.
- When supporting data is not present in the source text, output 'INSUFFICIENT DATA' and emit blank or null values for the value__ field.
`,
},
cite_sources_prompt: {
additionalProperties: {
type: "string",
},
type: "object",
title: "Cite Sources Prompt",
description: "The prompt to use for citing sources.",
default: {
description: `
### Citation Rules (read carefully):
- You must ANNOTATE every value with the MOST RELEVANT short EXACT substring from the source text that supports it.
- For inferred values, cite the text used to infer it in the matching_text field or output 'INFERRED FROM TEXT'
- If no support exists, output 'INSUFFICIENT DATA' and leave value__ null or '', 0.0, False etc depending on the type of the field.
`,
page: "Cite the page number of the source text that the extracted value is from. The page number is the integer that appears right after <<<PAGE:. If no page number is present in this format, use the default value of 1.",
matching_text:
'Cite the **MOST RELEVANT EXACT TEXT from the SOURCE TEXT** that supports the extracted value within 80 characters. If the exact substring is >80 chars, truncate with ellipsis "...". Provide only the single most relevant citation.',
},
},
scratchpad_prompt: {
type: "string",
title: "Scratchpad Prompt",
description: "The prompt to use for scratchpad.",
default: "Use for intermediate step-by-step reasoning. Be concise.",
},
},
type: "object",
title: "PromptConf",
} as const;
export const PublicModelNameSchema = {
type: "string",
enum: [
@@ -17926,6 +17830,7 @@ export const PublicModelNameSchema = {
"gemini-2.5-pro",
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
"gemini-2.5-flash-lite",
"gemini-1.5-flash",
"gemini-1.5-pro",
],
@@ -18752,12 +18657,6 @@ export const RoleSchema = {
description: "Schema for a role.",
} as const;
export const SchemaRelaxModeSchema = {
type: "string",
enum: ["FULL", "TOP_LEVEL", "LEAF"],
title: "SchemaRelaxMode",
} as const;
export const SearchRequestSchema = {
properties: {
page_size: {
@@ -18950,6 +18849,135 @@ BM25: Uses Qdrant's FastEmbed BM25 model for sparse embeddings
AUTO: Automatically selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade)`,
} as const;
export const SpreadsheetJobSchema = {
properties: {
id: {
type: "string",
title: "Id",
description: "The ID of the job",
},
user_id: {
type: "string",
title: "User Id",
description: "The ID of the user",
},
project_id: {
type: "string",
format: "uuid",
title: "Project Id",
description: "The ID of the project",
},
file_id: {
type: "string",
format: "uuid",
title: "File Id",
description: "The ID of the file to parse",
},
config: {
$ref: "#/components/schemas/SpreadsheetParsingConfig",
description: "Configuration for the parsing job",
},
status: {
$ref: "#/components/schemas/StatusEnum",
description: "The status of the parsing job",
},
created_at: {
type: "string",
title: "Created At",
description: "When the job was created",
},
updated_at: {
type: "string",
title: "Updated At",
description: "When the job was last updated",
},
success: {
anyOf: [
{
type: "boolean",
},
{
type: "null",
},
],
title: "Success",
description: "Whether the job completed successfully",
},
tables: {
items: {
$ref: "#/components/schemas/ExtractedTable",
},
type: "array",
title: "Tables",
description: "All extracted tables (populated when job is complete)",
},
errors: {
items: {
type: "string",
},
type: "array",
title: "Errors",
description: "Any errors encountered",
},
},
type: "object",
required: [
"id",
"user_id",
"project_id",
"file_id",
"config",
"status",
"created_at",
"updated_at",
],
title: "SpreadsheetJob",
description: "A spreadsheet parsing job",
} as const;
export const SpreadsheetJobCreateSchema = {
properties: {
file_id: {
type: "string",
format: "uuid",
title: "File Id",
description: "The ID of the file to parse",
},
config: {
$ref: "#/components/schemas/SpreadsheetParsingConfig",
description: "Configuration for the parsing job",
},
},
type: "object",
required: ["file_id"],
title: "SpreadsheetJobCreate",
description: "Request to create a spreadsheet parsing job",
} as const;
export const SpreadsheetParsingConfigSchema = {
properties: {
sheet_names: {
anyOf: [
{
items: {
type: "string",
},
type: "array",
},
{
type: "null",
},
],
title: "Sheet Names",
description:
"The names of the sheets to parse. If empty, all sheets will be parsed.",
},
},
type: "object",
title: "SpreadsheetParsingConfig",
description: "Configuration for spreadsheet parsing",
} as const;
export const StatusEnumSchema = {
type: "string",
enum: ["PENDING", "SUCCESS", "ERROR", "PARTIAL_SUCCESS", "CANCELLED"],
@@ -18957,101 +18985,6 @@ export const StatusEnumSchema = {
description: "Enum for representing the status of a job",
} as const;
export const StructModeSchema = {
type: "string",
enum: [
"STRUCT_PARSE",
"JSON_MODE",
"FUNC_CALL",
"STRUCT_RELAXED",
"UNSTRUCTURED",
],
title: "StructMode",
} as const;
export const StructParseConfSchema = {
properties: {
model: {
$ref: "#/components/schemas/ExtractModels",
description: "The model to use for the structured parsing.",
default: "openai-gpt-4-1",
},
temperature: {
type: "number",
title: "Temperature",
description: "The temperature to use for the structured parsing.",
default: 0,
},
relaxation_mode: {
$ref: "#/components/schemas/SchemaRelaxMode",
description: "The relaxation mode to use for the structured parsing.",
default: "LEAF",
},
struct_mode: {
$ref: "#/components/schemas/StructMode",
description: "The struct mode to use for the structured parsing.",
default: "STRUCT_PARSE",
},
fetch_logprobs: {
type: "boolean",
title: "Fetch Logprobs",
description: "Whether to fetch logprobs for the structured parsing.",
default: false,
},
handle_missing: {
type: "boolean",
title: "Handle Missing",
description: "Whether to handle missing fields in the schema.",
default: false,
},
use_reasoning: {
type: "boolean",
title: "Use Reasoning",
description: "Whether to use reasoning for the structured extraction.",
default: false,
},
cite_sources: {
type: "boolean",
title: "Cite Sources",
description: "Whether to cite sources for the structured extraction.",
default: false,
},
prompt_conf: {
$ref: "#/components/schemas/PromptConf",
description: "The prompt configuration for the structured parsing.",
default: {
system_prompt:
"Given a JSON schema, extract the data from the provided SOURCE TEXT according to the schema. Only output information that is explicitly stated or can be inferred from the SOURCE TEXT.",
extraction_prompt: "The extracted data using the given JSON schema.",
error_handling_prompt:
"If the source text does not contain enough information to extract the value, explain the reason very briefly. Else, output null and fill out the value__ field.",
reasoning_prompt: `
Provide a brief explanation for how you arrived at the extracted value based on the source text provided.
- For inferred values, explain the reasoning behind the extraction briefly.
- For simple verbatim extraction, output 'VERBATIM EXTRACTION'.
- When supporting data is not present in the source text, output 'INSUFFICIENT DATA' and emit blank or null values for the value__ field.
`,
cite_sources_prompt: {
description: `
### Citation Rules (read carefully):
- You must ANNOTATE every value with the MOST RELEVANT short EXACT substring from the source text that supports it.
- For inferred values, cite the text used to infer it in the matching_text field or output 'INFERRED FROM TEXT'
- If no support exists, output 'INSUFFICIENT DATA' and leave value__ null or '', 0.0, False etc depending on the type of the field.
`,
matching_text:
'Cite the **MOST RELEVANT EXACT TEXT from the SOURCE TEXT** that supports the extracted value within 80 characters. If the exact substring is >80 chars, truncate with ellipsis "...". Provide only the single most relevant citation.',
page: "Cite the page number of the source text that the extracted value is from. The page number is the integer that appears right after <<<PAGE:. If no page number is present in this format, use the default value of 1.",
},
scratchpad_prompt:
"Use for intermediate step-by-step reasoning. Be concise.",
},
},
},
type: "object",
title: "StructParseConf",
description: "Configuration for the structured parsing agent.",
} as const;
export const SupportedLLMModelSchema = {
properties: {
name: {
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+174 -206
View File
@@ -691,115 +691,6 @@ export const zBodyRunJobOnFileApiV1ExtractionJobsFilePost = z.object({
config_override: z.union([z.string(), z.null()]).optional(),
});
export const zExtractTarget = z.enum(["PER_DOC", "PER_PAGE"]);
export const zExtractMode = z.enum([
"FAST",
"BALANCED",
"PREMIUM",
"MULTIMODAL",
]);
export const zPublicModelName = z.enum([
"openai-gpt-4o",
"openai-gpt-4o-mini",
"openai-gpt-4-1",
"openai-gpt-4-1-mini",
"openai-gpt-4-1-nano",
"openai-gpt-5",
"openai-gpt-5-mini",
"openai-gpt-5-nano",
"openai-text-embedding-3-small",
"openai-text-embedding-3-large",
"openai-whisper-1",
"anthropic-sonnet-3.5",
"anthropic-sonnet-3.5-v2",
"anthropic-sonnet-3.7",
"anthropic-sonnet-4.0",
"gemini-2.5-flash",
"gemini-2.5-pro",
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
"gemini-1.5-flash",
"gemini-1.5-pro",
]);
export const zExtractModels = z.enum([
"openai-gpt-4-1",
"openai-gpt-4-1-mini",
"openai-gpt-4-1-nano",
"openai-gpt-5",
"openai-gpt-5-mini",
"gemini-2.0-flash",
"gemini-2.5-flash",
"gemini-2.5-pro",
"openai-gpt-4o",
"openai-gpt-4o-mini",
]);
export const zDocumentChunkMode = z.enum(["PAGE", "SECTION"]);
export const zExtractConfig = z.object({
priority: z
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
.optional(),
extraction_target: zExtractTarget.optional(),
extraction_mode: zExtractMode.optional(),
parse_model: z.union([zPublicModelName, z.null()]).optional(),
extract_model: z.union([zExtractModels, z.null()]).optional(),
multimodal_fast_mode: z.boolean().optional().default(false),
system_prompt: z.union([z.string(), z.null()]).optional(),
use_reasoning: z.boolean().optional().default(false),
cite_sources: z.boolean().optional().default(false),
confidence_scores: z.boolean().optional().default(false),
chunk_mode: zDocumentChunkMode.optional(),
high_resolution_mode: z.boolean().optional().default(false),
invalidate_cache: z.boolean().optional().default(false),
page_range: z.union([z.string(), z.null()]).optional(),
});
export const zExtractJobCreate = z.object({
priority: z
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
.optional(),
webhook_configurations: z
.union([z.array(zWebhookConfiguration), z.null()])
.optional(),
extraction_agent_id: z.string().uuid(),
file_id: z.string().uuid(),
data_schema_override: z
.union([z.object({}), z.string(), z.null()])
.optional(),
config_override: z.union([zExtractConfig, z.null()]).optional(),
});
export const zChunkMode = z.enum([
"PAGE",
"DOCUMENT",
"SECTION",
"GROUPED_PAGES",
]);
export const zMultimodalParseResolution = z.enum(["medium", "high"]);
export const zLlamaExtractSettings = z.object({
max_file_size: z.number().int().optional().default(104857600),
max_file_size_ui: z.number().int().optional().default(31457280),
max_pages: z.number().int().optional().default(500),
chunk_mode: zChunkMode.optional(),
max_chunk_size: z.number().int().optional().default(10000),
extraction_agent_config: z.object({}).optional(),
use_multimodal_parsing: z.boolean().optional().default(false),
use_pixel_extraction: z.boolean().optional().default(false),
llama_parse_params: zLlamaParseParameters.optional(),
multimodal_parse_resolution: zMultimodalParseResolution.optional(),
});
export const zBodyRunJobTestUserApiV1ExtractionJobsTestPost = z.object({
job_create: zExtractJobCreate,
extract_settings: z.union([zLlamaExtractSettings, z.null()]).optional(),
});
export const zBodyScreenshotApiParsingScreenshotPost = z.object({
file: z.union([z.string(), z.null()]).optional(),
do_not_cache: z.boolean().optional().default(false),
@@ -1072,11 +963,6 @@ export const zBodyUploadFileApiV1ParsingUploadPost = z.object({
page_footer_suffix: z.string().optional(),
});
export const zBodyUploadFileV2ApiV2Alpha1ParseUploadPost = z.object({
configuration: z.string(),
file: z.union([z.string(), z.null()]).optional(),
});
export const zBoxAuthMechanism = z.enum(["developer_token", "ccg"]);
export const zSupportedLlmModelNames = z.enum([
@@ -1700,6 +1586,7 @@ export const zCustomClaims = z.object({
allowed_classify: z.boolean().optional().default(true),
api_datasource_access: z.boolean().optional().default(false),
allow_org_deletion: z.boolean().optional().default(false),
allowed_spreadsheet: z.boolean().optional().default(false),
});
export const zCustomerPortalSessionCreatePayload = z.object({
@@ -1855,6 +1742,16 @@ export const zDefaultOrganizationUpdate = z.object({
organization_id: z.string().uuid(),
});
export const zDeleteRequest = z.object({
deployment_name: z.string(),
collection: z.string().optional().default("default"),
filter: z.union([z.object({}), z.null()]).optional(),
});
export const zDeleteResponse = z.object({
deleted_count: z.number().int(),
});
export const zRetrieverPipeline = z.object({
name: z.union([z.string().min(1).max(3000), z.null()]),
description: z.union([z.string().max(15000), z.null()]),
@@ -1870,6 +1767,8 @@ export const zDirectRetrievalParams = z.object({
pipelines: z.array(zRetrieverPipeline).optional(),
});
export const zDocumentChunkMode = z.enum(["PAGE", "SECTION"]);
export const zDocumentIngestionJobParams = z.object({
custom_metadata: z.union([z.object({}), z.null()]).optional(),
resource_info: z.union([z.object({}), z.null()]).optional(),
@@ -2122,6 +2021,74 @@ Query: {query_str}
Answer: `),
});
export const zExtractTarget = z.enum(["PER_DOC", "PER_PAGE"]);
export const zExtractMode = z.enum([
"FAST",
"BALANCED",
"PREMIUM",
"MULTIMODAL",
]);
export const zPublicModelName = z.enum([
"openai-gpt-4o",
"openai-gpt-4o-mini",
"openai-gpt-4-1",
"openai-gpt-4-1-mini",
"openai-gpt-4-1-nano",
"openai-gpt-5",
"openai-gpt-5-mini",
"openai-gpt-5-nano",
"openai-text-embedding-3-small",
"openai-text-embedding-3-large",
"openai-whisper-1",
"anthropic-sonnet-3.5",
"anthropic-sonnet-3.5-v2",
"anthropic-sonnet-3.7",
"anthropic-sonnet-4.0",
"gemini-2.5-flash",
"gemini-2.5-pro",
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
"gemini-2.5-flash-lite",
"gemini-1.5-flash",
"gemini-1.5-pro",
]);
export const zExtractModels = z.enum([
"openai-gpt-4-1",
"openai-gpt-4-1-mini",
"openai-gpt-4-1-nano",
"openai-gpt-5",
"openai-gpt-5-mini",
"gemini-2.0-flash",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemini-2.5-pro",
"openai-gpt-4o",
"openai-gpt-4o-mini",
]);
export const zExtractConfig = z.object({
priority: z
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
.optional(),
extraction_target: zExtractTarget.optional(),
extraction_mode: zExtractMode.optional(),
parse_model: z.union([zPublicModelName, z.null()]).optional(),
extract_model: z.union([zExtractModels, z.null()]).optional(),
multimodal_fast_mode: z.boolean().optional().default(false),
system_prompt: z.union([z.string(), z.null()]).optional(),
use_reasoning: z.boolean().optional().default(false),
cite_sources: z.boolean().optional().default(false),
confidence_scores: z.boolean().optional().default(false),
chunk_mode: zDocumentChunkMode.optional(),
high_resolution_mode: z.boolean().optional().default(false),
invalidate_cache: z.boolean().optional().default(false),
num_pages_context: z.union([z.number().int().gte(1), z.null()]).optional(),
page_range: z.union([z.string(), z.null()]).optional(),
});
export const zExtractAgent = z.object({
id: z.string().uuid(),
name: z.string(),
@@ -2167,6 +2134,21 @@ export const zExtractJob = z.object({
file: zFile,
});
export const zExtractJobCreate = z.object({
priority: z
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
.optional(),
webhook_configurations: z
.union([z.array(zWebhookConfiguration), z.null()])
.optional(),
extraction_agent_id: z.string().uuid(),
file_id: z.string().uuid(),
data_schema_override: z
.union([z.object({}), z.string(), z.null()])
.optional(),
config_override: z.union([zExtractConfig, z.null()]).optional(),
});
export const zExtractJobCreateBatch = z.object({
extraction_agent_id: z.string().uuid(),
file_ids: z.array(z.string().uuid()).min(1),
@@ -2234,6 +2216,15 @@ export const zExtractStatelessRequest = z.object({
file: z.union([zFileData, z.null()]).optional(),
});
export const zExtractedTable = z.object({
table_id: z.number().int(),
sheet_name: z.string(),
row_span: z.number().int(),
col_span: z.number().int(),
has_headers: z.boolean(),
metadata_json: z.union([z.string(), z.null()]).optional(),
});
export const zFileCountByStatusResponse = z.object({
counts: z.object({}),
total_count: z.number().int(),
@@ -2987,6 +2978,30 @@ export const zPaginatedResponseQuotaConfiguration = z.object({
items: z.array(zQuotaConfiguration),
});
export const zSpreadsheetParsingConfig = z.object({
sheet_names: z.union([z.array(z.string()), z.null()]).optional(),
});
export const zSpreadsheetJob = z.object({
id: z.string(),
user_id: z.string(),
project_id: z.string().uuid(),
file_id: z.string().uuid(),
config: zSpreadsheetParsingConfig,
status: zStatusEnum,
created_at: z.string(),
updated_at: z.string(),
success: z.union([z.boolean(), z.null()]).optional(),
tables: z.array(zExtractedTable).optional(),
errors: z.array(z.string()).optional(),
});
export const zPaginatedResponseSpreadsheetJob = z.object({
items: z.array(zSpreadsheetJob),
next_page_token: z.union([z.string(), z.null()]).optional(),
total_size: z.union([z.number().int(), z.null()]).optional(),
});
export const zParseConfiguration = z.object({
id: z.string(),
name: z.string(),
@@ -3400,49 +3415,6 @@ export const zProjectUpdate = z.object({
name: z.string().min(1).max(3000),
});
export const zPromptConf = z.object({
system_prompt: z
.string()
.optional()
.default(
"Given a JSON schema, extract the data from the provided SOURCE TEXT according to the schema. Only output information that is explicitly stated or can be inferred from the SOURCE TEXT.",
),
extraction_prompt: z
.string()
.optional()
.default("The extracted data using the given JSON schema."),
error_handling_prompt: z
.string()
.optional()
.default(
"If the source text does not contain enough information to extract the value, explain the reason very briefly. Else, output null and fill out the value__ field.",
),
reasoning_prompt: z.string().optional().default(`
Provide a brief explanation for how you arrived at the extracted value based on the source text provided.
- For inferred values, explain the reasoning behind the extraction briefly.
- For simple verbatim extraction, output 'VERBATIM EXTRACTION'.
- When supporting data is not present in the source text, output 'INSUFFICIENT DATA' and emit blank or null values for the value__ field.
`),
cite_sources_prompt: z
.object({})
.optional()
.default({
description: `
### Citation Rules (read carefully):
- You must ANNOTATE every value with the MOST RELEVANT short EXACT substring from the source text that supports it.
- For inferred values, cite the text used to infer it in the matching_text field or output 'INFERRED FROM TEXT'
- If no support exists, output 'INSUFFICIENT DATA' and leave value__ null or '', 0.0, False etc depending on the type of the field.
`,
page: "Cite the page number of the source text that the extracted value is from. The page number is the integer that appears right after <<<PAGE:. If no page number is present in this format, use the default value of 1.",
matching_text:
'Cite the **MOST RELEVANT EXACT TEXT from the SOURCE TEXT** that supports the extracted value within 80 characters. If the exact substring is >80 chars, truncate with ellipsis "...". Provide only the single most relevant citation.',
}),
scratchpad_prompt: z
.string()
.optional()
.default("Use for intermediate step-by-step reasoning. Be concise."),
});
export const zRelatedNodeInfo = z.object({
node_id: z.string(),
node_type: z.union([zObjectType, z.string(), z.null()]).optional(),
@@ -3545,8 +3517,6 @@ export const zRole = z.object({
permissions: z.array(zPermission),
});
export const zSchemaRelaxMode = z.enum(["FULL", "TOP_LEVEL", "LEAF"]);
export const zSearchRequest = z.object({
page_size: z.union([z.number().int(), z.null()]).optional(),
page_token: z.union([z.string(), z.null()]).optional(),
@@ -3558,24 +3528,9 @@ export const zSearchRequest = z.object({
offset: z.union([z.number().int().gte(0).lte(1000), z.null()]).optional(),
});
export const zStructMode = z.enum([
"STRUCT_PARSE",
"JSON_MODE",
"FUNC_CALL",
"STRUCT_RELAXED",
"UNSTRUCTURED",
]);
export const zStructParseConf = z.object({
model: zExtractModels.optional(),
temperature: z.number().optional().default(0),
relaxation_mode: zSchemaRelaxMode.optional(),
struct_mode: zStructMode.optional(),
fetch_logprobs: z.boolean().optional().default(false),
handle_missing: z.boolean().optional().default(false),
use_reasoning: z.boolean().optional().default(false),
cite_sources: z.boolean().optional().default(false),
prompt_conf: zPromptConf.optional(),
export const zSpreadsheetJobCreate = z.object({
file_id: z.string().uuid(),
config: zSpreadsheetParsingConfig.optional(),
});
export const zSupportedLlmModel = z.object({
@@ -4017,6 +3972,33 @@ export const zCreateIntentAndCustomerSessionApiV1BillingCreateIntentAndCustomerS
export const zGetMetronomeDashboardApiV1BillingMetronomeDashboardGetResponse =
zMetronomeDashboardResponse;
export const zListJobsApiV1ExtractionJobsGetResponse = z.array(zExtractJob);
export const zRunJobApiV1ExtractionJobsPostResponse = zExtractJob;
export const zGetJobApiV1ExtractionJobsJobIdGetResponse = zExtractJob;
export const zRunJobOnFileApiV1ExtractionJobsFilePostResponse = zExtractJob;
export const zRunBatchJobsApiV1ExtractionJobsBatchPostResponse =
z.array(zExtractJob);
export const zGetJobResultApiV1ExtractionJobsJobIdResultGetResponse =
zExtractResultset;
export const zListExtractRunsApiV1ExtractionRunsGetResponse =
zPaginatedExtractRunsResponse;
export const zGetLatestRunFromUiApiV1ExtractionRunsLatestFromUiGetResponse =
z.union([zExtractRun, z.null()]);
export const zGetRunByJobIdApiV1ExtractionRunsByJobJobIdGetResponse =
zExtractRun;
export const zGetRunApiV1ExtractionRunsRunIdGetResponse = zExtractRun;
export const zExtractStatelessApiV1ExtractionRunPostResponse = zExtractJob;
export const zListExtractionAgentsApiV1ExtractionExtractionAgentsGetResponse =
z.array(zExtractAgent);
@@ -4041,35 +4023,6 @@ export const zGetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentId
export const zUpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponse =
zExtractAgent;
export const zListJobsApiV1ExtractionJobsGetResponse = z.array(zExtractJob);
export const zRunJobApiV1ExtractionJobsPostResponse = zExtractJob;
export const zGetJobApiV1ExtractionJobsJobIdGetResponse = zExtractJob;
export const zRunJobTestUserApiV1ExtractionJobsTestPostResponse = zExtractJob;
export const zRunJobOnFileApiV1ExtractionJobsFilePostResponse = zExtractJob;
export const zRunBatchJobsApiV1ExtractionJobsBatchPostResponse =
z.array(zExtractJob);
export const zGetJobResultApiV1ExtractionJobsJobIdResultGetResponse =
zExtractResultset;
export const zListExtractRunsApiV1ExtractionRunsGetResponse =
zPaginatedExtractRunsResponse;
export const zGetLatestRunFromUiApiV1ExtractionRunsLatestFromUiGetResponse =
z.union([zExtractRun, z.null()]);
export const zGetRunByJobIdApiV1ExtractionRunsByJobJobIdGetResponse =
zExtractRun;
export const zGetRunApiV1ExtractionRunsRunIdGetResponse = zExtractRun;
export const zExtractStatelessApiV1ExtractionRunPostResponse = zExtractJob;
export const zListApiKeysApiV1BetaApiKeysGetResponse = zApiKeyQueryResponse;
export const zCreateApiKeyApiV1BetaApiKeysPostResponse = zApiKey;
@@ -4100,6 +4053,9 @@ export const zSearchAgentDataApiV1BetaAgentDataSearchPostResponse =
export const zAggregateAgentDataApiV1BetaAgentDataAggregatePostResponse =
zPaginatedResponseAggregateGroup;
export const zDeleteAgentDataByQueryApiV1BetaAgentDataDeletePostResponse =
zDeleteResponse;
export const zListQuotaConfigurationsApiV1BetaQuotaManagementGetResponse =
zPaginatedResponseQuotaConfiguration;
@@ -4135,6 +4091,18 @@ export const zQueryParseConfigurationsApiV1BetaParseConfigurationsQueryPostRespo
export const zGetLatestParseConfigurationApiV1BetaParseConfigurationsLatestGetResponse =
z.union([zParseConfiguration, z.null()]);
export const zListSpreadsheetJobsApiV1BetaSpreadsheetJobsGetResponse =
zPaginatedResponseSpreadsheetJob;
export const zCreateSpreadsheetJobApiV1BetaSpreadsheetJobsPostResponse =
zSpreadsheetJob;
export const zGetSpreadsheetJobApiV1BetaSpreadsheetJobsSpreadsheetJobIdGetResponse =
zSpreadsheetJob;
export const zGetTableDownloadPresignedUrlApiV1BetaSpreadsheetJobsSpreadsheetJobIdTablesTableIdResultGetResponse =
zPresignedUrl;
export const zUploadFileV2ApiV2Alpha1ParseUploadPostResponse = zParsingJob;
export const zGetSupportedFileExtensionsApiParsingSupportedFileExtensionsGetResponse =
@@ -0,0 +1,246 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { AgentClient, createAgentDataClient } from "../src/beta/agent/index.js";
import * as sdk from "../src/client/index.js";
describe("AgentClient", () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it("createItem sends correct payload and returns typed data", async () => {
const spy = vi
.spyOn(sdk, "createAgentDataApiV1BetaAgentDataPost")
.mockResolvedValue({
data: {
id: "1",
deployment_name: "dep",
collection: "col",
data: { foo: "bar" },
created_at: "2024-01-01T00:00:00Z",
updated_at: "2024-01-01T00:00:00Z",
},
} as any);
const client = new AgentClient<{ foo: string }>({
deploymentName: "dep",
collection: "col",
});
const result = await client.createItem({ foo: "bar" });
expect(spy).toHaveBeenCalledOnce();
const call = spy.mock.calls[0][0];
expect(call.body.deployment_name).toBe("dep");
expect(call.body.collection).toBe("col");
expect(call.body.data).toEqual({ foo: "bar" });
expect(result.id).toBe("1");
expect(result.deploymentName).toBe("dep");
expect(result.collection).toBe("col");
expect(result.data).toEqual({ foo: "bar" });
expect(result.createdAt).toEqual(new Date("2024-01-01T00:00:00Z"));
expect(result.updatedAt).toEqual(new Date("2024-01-01T00:00:00Z"));
});
it("getItem returns null for 404 errors", async () => {
const spy = vi
.spyOn(sdk, "getAgentDataApiV1BetaAgentDataItemIdGet")
.mockImplementation(async () => {
const err: any = new Error("Not found");
err.response = { status: 404 };
throw err;
});
const client = new AgentClient({ deploymentName: "dep" });
const res = await client.getItem("missing-id");
expect(spy).toHaveBeenCalledOnce();
expect(res).toBeNull();
});
it("updateItem updates and returns typed data", async () => {
const spy = vi
.spyOn(sdk, "updateAgentDataApiV1BetaAgentDataItemIdPut")
.mockResolvedValue({
data: {
id: "123",
deployment_name: "dep",
collection: "col",
data: { foo: "baz" },
created_at: "2024-01-01T00:00:00Z",
updated_at: "2024-01-02T00:00:00Z",
},
} as any);
const client = new AgentClient<{ foo: string }>({
deploymentName: "dep",
collection: "col",
});
const res = await client.updateItem("123", { foo: "baz" });
expect(spy).toHaveBeenCalledOnce();
const call = spy.mock.calls[0][0];
expect(call.path.item_id).toBe("123");
expect(call.body.data).toEqual({ foo: "baz" });
expect(res.id).toBe("123");
expect(res.updatedAt).toEqual(new Date("2024-01-02T00:00:00Z"));
});
it("deleteItem calls delete endpoint with correct path", async () => {
const spy = vi
.spyOn(sdk, "deleteAgentDataApiV1BetaAgentDataItemIdDelete")
.mockResolvedValue({} as any);
const client = new AgentClient({ deploymentName: "dep" });
await client.deleteItem("abc");
expect(spy).toHaveBeenCalledOnce();
expect(spy.mock.calls[0][0].path.item_id).toBe("abc");
});
it("delete by query returns deleted count", async () => {
const spy = vi
.spyOn(sdk, "deleteAgentDataByQueryApiV1BetaAgentDataDeletePost")
.mockResolvedValue({ data: { deleted_count: 7 } } as any);
const client = new AgentClient({
deploymentName: "dep",
collection: "col",
});
const count = await client.delete({
filter: { status: { op: "eq", value: "accepted" } as any },
});
expect(spy).toHaveBeenCalledOnce();
const body = spy.mock.calls[0][0].body;
expect(body.deployment_name).toBe("dep");
expect(body.collection).toBe("col");
expect(count).toBe(7);
});
it("search maps items and optional fields correctly", async () => {
const now = "2024-01-01T00:00:00Z";
const spy = vi
.spyOn(sdk, "searchAgentDataApiV1BetaAgentDataSearchPost")
.mockResolvedValue({
data: {
items: [
{
id: "1",
deployment_name: "dep",
collection: "col",
data: { foo: "bar" },
created_at: now,
updated_at: now,
},
],
total_size: 1,
next_page_token: "next",
},
} as any);
const client = new AgentClient<{ foo: string }>({
deploymentName: "dep",
collection: "col",
});
const result = await client.search({
includeTotal: true,
orderBy: "created_at desc",
pageSize: 1,
offset: 0,
});
expect(spy).toHaveBeenCalledOnce();
const body = spy.mock.calls[0][0].body;
expect(body.deployment_name).toBe("dep");
expect(body.collection).toBe("col");
expect(body.include_total).toBe(true);
expect(body.order_by).toBe("created_at desc");
expect(body.page_size).toBe(1);
expect(body.offset).toBe(0);
expect(result.items).toHaveLength(1);
expect(result.totalSize).toBe(1);
expect(result.nextPageToken).toBe("next");
expect(result.items[0].createdAt).toEqual(new Date(now));
});
it("aggregate maps groups and optional fields correctly", async () => {
const spy = vi
.spyOn(sdk, "aggregateAgentDataApiV1BetaAgentDataAggregatePost")
.mockResolvedValue({
data: {
items: [
{
group_key: { status: "accepted" },
count: 3,
first_item: { foo: "bar" },
},
],
total_size: 1,
next_page_token: "tok",
},
} as any);
const client = new AgentClient<{ foo: string }>({
deploymentName: "dep",
collection: "col",
});
const result = await client.aggregate({
groupBy: ["status"],
count: true,
first: true,
pageSize: 1,
offset: 0,
});
expect(spy).toHaveBeenCalledOnce();
const body = spy.mock.calls[0][0].body;
expect(body.deployment_name).toBe("dep");
expect(body.collection).toBe("col");
expect(body.group_by).toEqual(["status"]);
expect(body.count).toBe(true);
expect(body.first).toBe(true);
expect(body.page_size).toBe(1);
expect(body.offset).toBe(0);
expect(result.items).toHaveLength(1);
expect(result.totalSize).toBe(1);
expect(result.nextPageToken).toBe("tok");
expect(result.items[0].groupKey).toEqual({ status: "accepted" });
expect(result.items[0].count).toBe(3);
expect(result.items[0].firstItem).toEqual({ foo: "bar" });
});
it("createAgentDataClient infers deployment name from env", async () => {
const spy = vi
.spyOn(sdk, "searchAgentDataApiV1BetaAgentDataSearchPost")
.mockResolvedValue({
data: { items: [], total_size: 0 },
} as any);
const client = createAgentDataClient({
env: { LLAMA_DEPLOY_DEPLOYMENT_NAME: "env-dep" },
});
await client.search({});
const body = spy.mock.calls[0][0].body;
expect(body.deployment_name).toBe("env-dep");
});
it("createAgentDataClient infers deployment name from windowUrl (non-local)", async () => {
const spy = vi
.spyOn(sdk, "deleteAgentDataByQueryApiV1BetaAgentDataDeletePost")
.mockResolvedValue({
data: { deleted_count: 0 },
} as any);
const client = createAgentDataClient({
windowUrl: "https://app.llamaindex.ai/deployments/abc/ui/",
});
await client.delete({});
const body = spy.mock.calls[0][0].body;
expect(body.deployment_name).toBe("abc");
});
});