mirror of
https://github.com/run-llama/llama_cloud_services.git
synced 2026-07-01 21:44:37 -04:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f708ff39e | |||
| 43b249f93d | |||
| d7ea1c27a1 | |||
| 6879228844 | |||
| 5623a66c22 | |||
| 46cd974082 | |||
| b495e4b897 | |||
| ded6ae9383 |
@@ -194,6 +194,21 @@ class AsyncAgentDataClient(Generic[AgentDataT]):
|
||||
async def delete_item(self, item_id: str) -> None:
|
||||
await self.client.beta.delete_agent_data(item_id=item_id)
|
||||
|
||||
@agent_data_retry
|
||||
async def delete(
|
||||
self, filter: Optional[Dict[str, Dict[ComparisonOperator, Any]]] = None
|
||||
) -> int:
|
||||
"""
|
||||
Delete agent data by query, similar to search.
|
||||
Returns the number of deleted items.
|
||||
"""
|
||||
response = await self.client.beta.delete_agent_data_by_query_api_v_1_beta_agent_data_delete_post(
|
||||
deployment_name=self.deployment_name,
|
||||
collection=self.collection,
|
||||
filter=filter,
|
||||
)
|
||||
return response.deleted_count
|
||||
|
||||
@agent_data_retry
|
||||
async def search(
|
||||
self,
|
||||
|
||||
@@ -19,14 +19,12 @@ from llama_cloud import (
|
||||
ExtractAgent as CloudExtractAgent,
|
||||
ExtractConfig,
|
||||
ExtractJob,
|
||||
ExtractJobCreate,
|
||||
ExtractRun,
|
||||
File,
|
||||
FileData,
|
||||
ExtractMode,
|
||||
StatusEnum,
|
||||
ExtractTarget,
|
||||
LlamaExtractSettings,
|
||||
PaginatedExtractRunsResponse,
|
||||
)
|
||||
from llama_cloud.client import AsyncLlamaCloud
|
||||
@@ -463,56 +461,6 @@ class ExtractionAgent:
|
||||
)
|
||||
)
|
||||
|
||||
async def _run_extraction_test(
|
||||
self,
|
||||
files: Union[FileInput, List[FileInput]],
|
||||
extract_settings: LlamaExtractSettings,
|
||||
) -> Union[ExtractJob, List[ExtractJob]]:
|
||||
if not isinstance(files, list):
|
||||
files = [files]
|
||||
single_file = True
|
||||
else:
|
||||
single_file = False
|
||||
|
||||
upload_tasks = [self._upload_file(file) for file in files]
|
||||
with augment_async_errors():
|
||||
uploaded_files = await run_jobs(
|
||||
upload_tasks,
|
||||
workers=self.num_workers,
|
||||
desc="Uploading files",
|
||||
show_progress=self.show_progress,
|
||||
)
|
||||
|
||||
async def run_job(file: File) -> ExtractRun:
|
||||
job_queued = await self._client.llama_extract.run_job_test_user(
|
||||
job_create=ExtractJobCreate(
|
||||
extraction_agent_id=self.id,
|
||||
file_id=file.id,
|
||||
data_schema_override=self.data_schema,
|
||||
config_override=self.config,
|
||||
),
|
||||
extract_settings=extract_settings,
|
||||
)
|
||||
return await self._wait_for_job_result(job_queued.id)
|
||||
|
||||
job_tasks = [run_job(file) for file in uploaded_files]
|
||||
with augment_async_errors():
|
||||
extract_results = await run_jobs(
|
||||
job_tasks,
|
||||
workers=self.num_workers,
|
||||
desc="Running extraction jobs",
|
||||
show_progress=self.show_progress,
|
||||
)
|
||||
|
||||
if self._verbose:
|
||||
for file, job in zip(files, extract_results):
|
||||
file_repr = (
|
||||
str(file) if isinstance(file, (str, Path)) else "<bytes/buffer>"
|
||||
)
|
||||
print(f"Running extraction for file {file_repr} under job_id {job.id}")
|
||||
|
||||
return extract_results[0] if single_file else extract_results
|
||||
|
||||
async def queue_extraction(
|
||||
self,
|
||||
files: Union[FileInput, List[FileInput]],
|
||||
@@ -544,12 +492,10 @@ class ExtractionAgent:
|
||||
|
||||
job_tasks = [
|
||||
self._client.llama_extract.run_job(
|
||||
request=ExtractJobCreate(
|
||||
extraction_agent_id=self.id,
|
||||
file_id=file.id,
|
||||
data_schema_override=self.data_schema,
|
||||
config_override=self.config,
|
||||
),
|
||||
extraction_agent_id=self.id,
|
||||
file_id=file.id,
|
||||
data_schema_override=self.data_schema,
|
||||
config_override=self.config,
|
||||
)
|
||||
for file in uploaded_files
|
||||
]
|
||||
|
||||
+1
-1
@@ -27,7 +27,7 @@ readme = "README.md"
|
||||
license = "MIT"
|
||||
dependencies = [
|
||||
"llama-index-core>=0.12.0",
|
||||
"llama-cloud==0.1.42",
|
||||
"llama-cloud==0.1.43",
|
||||
"pydantic>=2.8,!=2.10",
|
||||
"click>=8.1.7,<9",
|
||||
"python-dotenv>=1.0.1,<2",
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from llama_cloud_services.extract import LlamaExtract, ExtractionAgent
|
||||
from time import perf_counter
|
||||
from llama_cloud_services.extract import LlamaExtract
|
||||
from collections import namedtuple
|
||||
import json
|
||||
import uuid
|
||||
from llama_cloud.types import (
|
||||
ExtractConfig,
|
||||
ExtractMode,
|
||||
LlamaParseParameters,
|
||||
LlamaExtractSettings,
|
||||
)
|
||||
from tests.extract.util import load_test_dotenv
|
||||
|
||||
@@ -122,27 +119,3 @@ def extraction_agent(test_case: BenchmarkTestCase, extractor: LlamaExtract):
|
||||
# Create new agent
|
||||
agent = extractor.create_agent(agent_name, schema, config=test_case.config)
|
||||
yield agent
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"CI" in os.environ or not LLAMA_CLOUD_API_KEY,
|
||||
reason="LLAMA_CLOUD_API_KEY not set or CI environment not suitable for benchmarking",
|
||||
)
|
||||
@pytest.mark.parametrize("test_case", get_test_cases(), ids=lambda x: x.name)
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_extraction(
|
||||
test_case: BenchmarkTestCase, extraction_agent: ExtractionAgent
|
||||
) -> None:
|
||||
start = perf_counter()
|
||||
result = await extraction_agent._run_extraction_test(
|
||||
test_case.input_file,
|
||||
extract_settings=LlamaExtractSettings(
|
||||
llama_parse_params=LlamaParseParameters(
|
||||
invalidate_cache=True,
|
||||
do_not_cache=True,
|
||||
)
|
||||
),
|
||||
)
|
||||
end = perf_counter()
|
||||
print(f"Time taken: {end - start} seconds")
|
||||
print(result)
|
||||
|
||||
@@ -7,7 +7,7 @@ from pathlib import Path
|
||||
|
||||
|
||||
def load_test_dotenv():
|
||||
load_dotenv(Path(__file__).parent.parent.parent / ".env.dev", override=True)
|
||||
load_dotenv(Path(__file__).parent.parent.parent.parent / ".env.dev", override=True)
|
||||
|
||||
|
||||
def json_subset_match_score(expected: Any, actual: Any) -> float:
|
||||
|
||||
@@ -304,6 +304,9 @@ async def test_page_screenshot_retrieval(index_name: str, local_file: str):
|
||||
not base_url or not api_key, reason="No platform base url or api key set"
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(
|
||||
reason="Consistently failing with FAILED tests/index/test_index.py::test_page_figure_retrieval - assert 0 > 0 + where 0 = len([])"
|
||||
)
|
||||
async def test_page_figure_retrieval(index_name: str, local_figures_file: str):
|
||||
index = await LlamaCloudIndex.acreate_index(
|
||||
name=index_name,
|
||||
|
||||
@@ -118,10 +118,8 @@ async def test_extraction_agent_aextract_accepts_llama_file(
|
||||
dummy_llama_extract_iface = SimpleNamespace()
|
||||
|
||||
async def fake_run_job(**kwargs):
|
||||
# Ensure we are receiving a request with the right file_id
|
||||
request = kwargs.get("request")
|
||||
assert hasattr(request, "file_id")
|
||||
assert request.file_id == llama_file.id
|
||||
file_id = kwargs.get("file_id")
|
||||
assert file_id == llama_file.id
|
||||
return SimpleNamespace(id="job_42")
|
||||
|
||||
dummy_llama_extract_iface.run_job = fake_run_job
|
||||
|
||||
Generated
+5
-5
@@ -1582,21 +1582,21 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "llama-cloud"
|
||||
version = "0.1.42"
|
||||
version = "0.1.43"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/21/04/ae0694b582d6aab4d6e7957febb7bff048897ac231ad80ba1bd71547d944/llama_cloud-0.1.42.tar.gz", hash = "sha256:485aa0e364ea648e3aaa3b2c54af7bcb6f2242c50b4f86ec022e137413fff464", size = 112480, upload-time = "2025-09-16T20:25:42.631Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9b/33/33a8bd3a617c071caf450ca2627969f8b28272d0692f122997c10a32247e/llama_cloud-0.1.43.tar.gz", hash = "sha256:00429f05aea515449d90cde91ef3ed3687fcd93e46f6246d08cbea02f9b397a9", size = 112992, upload-time = "2025-10-02T21:55:38.355Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/61/85d115699a59d03f0783e119aaf6d534fca95dbe1a4531a8056e6a4774ed/llama_cloud-0.1.42-py3-none-any.whl", hash = "sha256:4ed3edde4a277ff52eeb831188c8476eb079b5e4605ad3142157a0f054b27d96", size = 311857, upload-time = "2025-09-16T20:25:41.479Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/54/559a67542396d5660a71115b29e0160e9dd784e570e1f4ef55ad22bf5b39/llama_cloud-0.1.43-py3-none-any.whl", hash = "sha256:540605d4dd13c6536a3b75cd4d04b211f29b16d17faee9381e3793a651f1dec1", size = 311460, upload-time = "2025-10-02T21:55:37.282Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "llama-cloud-services"
|
||||
version = "0.6.68"
|
||||
version = "0.6.69"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||
@@ -1631,7 +1631,7 @@ dev = [
|
||||
requires-dist = [
|
||||
{ name = "click", specifier = ">=8.1.7,<9" },
|
||||
{ name = "eval-type-backport", marker = "python_full_version < '3.10'", specifier = ">=0.2.0,<0.3" },
|
||||
{ name = "llama-cloud", specifier = "==0.1.42" },
|
||||
{ name = "llama-cloud", specifier = "==0.1.43" },
|
||||
{ name = "llama-index-core", specifier = ">=0.12.0" },
|
||||
{ name = "packaging", specifier = ">=25.0" },
|
||||
{ name = "platformdirs", specifier = ">=4.3.7,<5" },
|
||||
|
||||
+1714
-1270
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,7 @@
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"get-openapi": "node ./scripts/get-openapi.js",
|
||||
"generate": "./node_modules/.bin/openapi-ts",
|
||||
"build": "pnpm run generate && bunchee",
|
||||
"dev": "bunchee --watch",
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
import fs from 'fs';
|
||||
|
||||
async function downloadOpenApiSpec() {
|
||||
try {
|
||||
const response = await fetch('https://api.cloud.llamaindex.ai/api/openapi.json');
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
fs.writeFileSync('openapi.json', JSON.stringify(data, null, 2));
|
||||
console.log('Successfully downloaded openapi.json');
|
||||
} catch (error) {
|
||||
console.error('Error downloading OpenAPI spec:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
downloadOpenApiSpec();
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
aggregateAgentDataApiV1BetaAgentDataAggregatePost,
|
||||
createAgentDataApiV1BetaAgentDataPost,
|
||||
deleteAgentDataApiV1BetaAgentDataItemIdDelete,
|
||||
deleteAgentDataByQueryApiV1BetaAgentDataDeletePost,
|
||||
getAgentDataApiV1BetaAgentDataItemIdGet,
|
||||
searchAgentDataApiV1BetaAgentDataSearchPost,
|
||||
updateAgentDataApiV1BetaAgentDataItemIdPut,
|
||||
@@ -12,6 +13,7 @@ import {
|
||||
} from "../../client";
|
||||
import type {
|
||||
AggregateAgentDataOptions,
|
||||
DeleteAgentDataOptions,
|
||||
SearchAgentDataOptions,
|
||||
TypedAgentData,
|
||||
TypedAgentDataItems,
|
||||
@@ -112,6 +114,24 @@ export class AgentClient<T = unknown> {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all matching agent data, returns the total number of deleted items
|
||||
*/
|
||||
async delete(options: DeleteAgentDataOptions): Promise<number> {
|
||||
const response = await deleteAgentDataByQueryApiV1BetaAgentDataDeletePost({
|
||||
throwOnError: true,
|
||||
body: {
|
||||
deployment_name: this.deploymentName,
|
||||
...(this.collection !== undefined && {
|
||||
collection: this.collection,
|
||||
}),
|
||||
...(options.filter !== undefined && { filter: options.filter }),
|
||||
},
|
||||
client: this.client,
|
||||
});
|
||||
return response.data.deleted_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search agent data
|
||||
*/
|
||||
|
||||
@@ -127,6 +127,14 @@ export interface SearchAgentDataOptions {
|
||||
includeTotal?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for deleting agent data
|
||||
*/
|
||||
export interface DeleteAgentDataOptions {
|
||||
/** Filter options for the deletion. */
|
||||
filter?: Record<string, FilterOperation>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for aggregating agent data
|
||||
*/
|
||||
|
||||
@@ -1530,27 +1530,6 @@ export const Body_run_job_on_file_api_v1_extraction_jobs_file_postSchema = {
|
||||
title: "Body_run_job_on_file_api_v1_extraction_jobs_file_post",
|
||||
} as const;
|
||||
|
||||
export const Body_run_job_test_user_api_v1_extraction_jobs_test_postSchema = {
|
||||
properties: {
|
||||
job_create: {
|
||||
$ref: "#/components/schemas/ExtractJobCreate",
|
||||
},
|
||||
extract_settings: {
|
||||
anyOf: [
|
||||
{
|
||||
$ref: "#/components/schemas/LlamaExtractSettings",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["job_create"],
|
||||
title: "Body_run_job_test_user_api_v1_extraction_jobs_test_post",
|
||||
} as const;
|
||||
|
||||
export const Body_screenshot_api_parsing_screenshot_postSchema = {
|
||||
properties: {
|
||||
file: {
|
||||
@@ -2796,30 +2775,6 @@ export const Body_upload_file_api_v1_parsing_upload_postSchema = {
|
||||
title: "Body_upload_file_api_v1_parsing_upload_post",
|
||||
} as const;
|
||||
|
||||
export const Body_upload_file_v2_api_v2alpha1_parse_upload_postSchema = {
|
||||
properties: {
|
||||
configuration: {
|
||||
type: "string",
|
||||
title: "Configuration",
|
||||
},
|
||||
file: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "string",
|
||||
format: "binary",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "File",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["configuration"],
|
||||
title: "Body_upload_file_v2_api_v2alpha1_parse_upload_post",
|
||||
} as const;
|
||||
|
||||
export const BoxAuthMechanismSchema = {
|
||||
type: "string",
|
||||
enum: ["developer_token", "ccg"],
|
||||
@@ -3180,12 +3135,6 @@ export const ChatMessageSchema = {
|
||||
title: "ChatMessage",
|
||||
} as const;
|
||||
|
||||
export const ChunkModeSchema = {
|
||||
type: "string",
|
||||
enum: ["PAGE", "DOCUMENT", "SECTION", "GROUPED_PAGES"],
|
||||
title: "ChunkMode",
|
||||
} as const;
|
||||
|
||||
export const ClassificationResultSchema = {
|
||||
properties: {
|
||||
reasoning: {
|
||||
@@ -5486,6 +5435,13 @@ export const CustomClaimsSchema = {
|
||||
description: "Whether the user is allowed to delete organizations.",
|
||||
default: false,
|
||||
},
|
||||
allowed_spreadsheet: {
|
||||
type: "boolean",
|
||||
title: "Allowed Spreadsheet",
|
||||
description:
|
||||
"Whether the user is allowed to access the spreadsheet feature.",
|
||||
default: false,
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
title: "CustomClaims",
|
||||
@@ -6213,6 +6169,54 @@ export const DeleteParamsSchema = {
|
||||
description: "Schema for the parameters of a delete job.",
|
||||
} as const;
|
||||
|
||||
export const DeleteRequestSchema = {
|
||||
properties: {
|
||||
deployment_name: {
|
||||
type: "string",
|
||||
title: "Deployment Name",
|
||||
description: "The agent deployment's name to delete data for",
|
||||
},
|
||||
collection: {
|
||||
type: "string",
|
||||
title: "Collection",
|
||||
description: "The logical agent data collection to delete from",
|
||||
default: "default",
|
||||
},
|
||||
filter: {
|
||||
anyOf: [
|
||||
{
|
||||
additionalProperties: {
|
||||
$ref: "#/components/schemas/FilterOperation",
|
||||
},
|
||||
type: "object",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Filter",
|
||||
description: "Optional filters to select which items to delete",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["deployment_name"],
|
||||
title: "DeleteRequest",
|
||||
description: "API request body for bulk deleting agent data by query",
|
||||
} as const;
|
||||
|
||||
export const DeleteResponseSchema = {
|
||||
properties: {
|
||||
deleted_count: {
|
||||
type: "integer",
|
||||
title: "Deleted Count",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["deleted_count"],
|
||||
title: "DeleteResponse",
|
||||
description: "API response for bulk delete operation",
|
||||
} as const;
|
||||
|
||||
export const DirectRetrievalParamsSchema = {
|
||||
properties: {
|
||||
mode: {
|
||||
@@ -6946,6 +6950,20 @@ export const ExtractConfigSchema = {
|
||||
description: "Whether to invalidate the cache for the extraction.",
|
||||
default: false,
|
||||
},
|
||||
num_pages_context: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "integer",
|
||||
minimum: 1,
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Num Pages Context",
|
||||
description:
|
||||
"Number of pages to pass as context on long document extraction.",
|
||||
},
|
||||
page_range: {
|
||||
anyOf: [
|
||||
{
|
||||
@@ -7202,6 +7220,7 @@ export const ExtractModelsSchema = {
|
||||
"openai-gpt-5-mini",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-flash-lite",
|
||||
"gemini-2.5-pro",
|
||||
"openai-gpt-4o",
|
||||
"openai-gpt-4o-mini",
|
||||
@@ -7849,6 +7868,52 @@ export const ExtractTargetSchema = {
|
||||
title: "ExtractTarget",
|
||||
} as const;
|
||||
|
||||
export const ExtractedTableSchema = {
|
||||
properties: {
|
||||
table_id: {
|
||||
type: "integer",
|
||||
title: "Table Id",
|
||||
description: "Unique identifier for this table within the file",
|
||||
},
|
||||
sheet_name: {
|
||||
type: "string",
|
||||
title: "Sheet Name",
|
||||
description: "Worksheet name where table was found",
|
||||
},
|
||||
row_span: {
|
||||
type: "integer",
|
||||
title: "Row Span",
|
||||
description: "Number of rows in the table",
|
||||
},
|
||||
col_span: {
|
||||
type: "integer",
|
||||
title: "Col Span",
|
||||
description: "Number of columns in the table",
|
||||
},
|
||||
has_headers: {
|
||||
type: "boolean",
|
||||
title: "Has Headers",
|
||||
description: "Whether the table has header rows",
|
||||
},
|
||||
metadata_json: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Metadata Json",
|
||||
description: "JSON metadata with detailed table information",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["table_id", "sheet_name", "row_span", "col_span", "has_headers"],
|
||||
title: "ExtractedTable",
|
||||
description: "A single extracted table from a spreadsheet",
|
||||
} as const;
|
||||
|
||||
export const FailPageModeSchema = {
|
||||
type: "string",
|
||||
enum: ["raw_text", "blank_page", "error_message"],
|
||||
@@ -10828,140 +10893,6 @@ export const LegacyParseJobConfigSchema = {
|
||||
description: "Configuration for llamaparse job",
|
||||
} as const;
|
||||
|
||||
export const LlamaExtractSettingsSchema = {
|
||||
properties: {
|
||||
max_file_size: {
|
||||
type: "integer",
|
||||
title: "Max File Size",
|
||||
description: "The maximum file size (in bytes) allowed for the document.",
|
||||
default: 104857600,
|
||||
},
|
||||
max_file_size_ui: {
|
||||
type: "integer",
|
||||
title: "Max File Size Ui",
|
||||
description: "The maximum file size (in bytes) allowed for the document.",
|
||||
default: 31457280,
|
||||
},
|
||||
max_pages: {
|
||||
type: "integer",
|
||||
title: "Max Pages",
|
||||
description: "The maximum number of pages allowed for the document.",
|
||||
default: 500,
|
||||
},
|
||||
chunk_mode: {
|
||||
$ref: "#/components/schemas/ChunkMode",
|
||||
description: "The mode to use for chunking the document.",
|
||||
default: "SECTION",
|
||||
},
|
||||
max_chunk_size: {
|
||||
type: "integer",
|
||||
title: "Max Chunk Size",
|
||||
description:
|
||||
"The maximum size of the chunks (in tokens) to use for chunking the document.",
|
||||
default: 10000,
|
||||
},
|
||||
extraction_agent_config: {
|
||||
additionalProperties: {
|
||||
$ref: "#/components/schemas/StructParseConf",
|
||||
},
|
||||
type: "object",
|
||||
title: "Extraction Agent Config",
|
||||
description: "The configuration for the extraction agent.",
|
||||
},
|
||||
use_multimodal_parsing: {
|
||||
type: "boolean",
|
||||
title: "Use Multimodal Parsing",
|
||||
description: "Whether to use experimental multimodal parsing.",
|
||||
default: false,
|
||||
},
|
||||
use_pixel_extraction: {
|
||||
type: "boolean",
|
||||
title: "Use Pixel Extraction",
|
||||
description:
|
||||
"DEPRECATED: Whether to use extraction over pixels for multimodal mode.",
|
||||
default: false,
|
||||
},
|
||||
llama_parse_params: {
|
||||
$ref: "#/components/schemas/LlamaParseParameters",
|
||||
description: "LlamaParse related settings.",
|
||||
default: {
|
||||
languages: ["en"],
|
||||
parsing_instruction: "",
|
||||
disable_ocr: false,
|
||||
annotate_links: true,
|
||||
adaptive_long_table: true,
|
||||
compact_markdown_table: false,
|
||||
disable_reconstruction: false,
|
||||
disable_image_extraction: false,
|
||||
invalidate_cache: false,
|
||||
outlined_table_extraction: true,
|
||||
merge_tables_across_pages_in_markdown: false,
|
||||
output_pdf_of_document: false,
|
||||
do_not_cache: false,
|
||||
fast_mode: false,
|
||||
skip_diagonal_text: false,
|
||||
preserve_layout_alignment_across_pages: false,
|
||||
preserve_very_small_text: false,
|
||||
gpt4o_mode: false,
|
||||
do_not_unroll_columns: false,
|
||||
extract_layout: false,
|
||||
high_res_ocr: false,
|
||||
html_make_all_elements_visible: false,
|
||||
layout_aware: false,
|
||||
specialized_chart_parsing_agentic: false,
|
||||
specialized_chart_parsing_plus: false,
|
||||
specialized_chart_parsing_efficient: false,
|
||||
specialized_image_parsing: false,
|
||||
precise_bounding_box: false,
|
||||
html_remove_navigation_elements: false,
|
||||
html_remove_fixed_elements: false,
|
||||
guess_xlsx_sheet_name: false,
|
||||
use_vendor_multimodal_model: false,
|
||||
page_prefix: `<<<PAGE:{pageNumber}>>>
|
||||
|
||||
`,
|
||||
page_suffix: `
|
||||
|
||||
<<<END_PAGE>>>`,
|
||||
take_screenshot: false,
|
||||
is_formatting_instruction: true,
|
||||
premium_mode: false,
|
||||
continuous_mode: false,
|
||||
auto_mode: false,
|
||||
auto_mode_trigger_on_table_in_page: false,
|
||||
auto_mode_trigger_on_image_in_page: false,
|
||||
structured_output: false,
|
||||
extract_charts: false,
|
||||
spreadsheet_extract_sub_tables: false,
|
||||
spreadsheet_force_formula_computation: false,
|
||||
inline_images_in_markdown: false,
|
||||
strict_mode_image_extraction: false,
|
||||
strict_mode_image_ocr: false,
|
||||
strict_mode_reconstruction: false,
|
||||
strict_mode_buggy_font: false,
|
||||
save_images: true,
|
||||
hide_headers: false,
|
||||
hide_footers: false,
|
||||
ignore_document_elements_for_layout_detection: false,
|
||||
output_tables_as_HTML: false,
|
||||
internal_is_screenshot_job: false,
|
||||
parse_mode: "parse_page_with_llm",
|
||||
page_error_tolerance: 0.05,
|
||||
replace_failed_page_mode: "raw_text",
|
||||
},
|
||||
},
|
||||
multimodal_parse_resolution: {
|
||||
$ref: "#/components/schemas/MultimodalParseResolution",
|
||||
description: "The resolution to use for multimodal parsing.",
|
||||
default: "medium",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
title: "LlamaExtractSettings",
|
||||
description: `All settings for the extraction agent. Only the settings in ExtractConfig
|
||||
are exposed to the user.`,
|
||||
} as const;
|
||||
|
||||
export const LlamaParseParametersSchema = {
|
||||
properties: {
|
||||
webhook_configurations: {
|
||||
@@ -12602,12 +12533,6 @@ export const MetronomeDashboardTypeSchema = {
|
||||
title: "MetronomeDashboardType",
|
||||
} as const;
|
||||
|
||||
export const MultimodalParseResolutionSchema = {
|
||||
type: "string",
|
||||
enum: ["medium", "high"],
|
||||
title: "MultimodalParseResolution",
|
||||
} as const;
|
||||
|
||||
export const NodeRelationshipSchema = {
|
||||
type: "string",
|
||||
enum: ["1", "2", "3", "4", "5"],
|
||||
@@ -13430,6 +13355,48 @@ export const PaginatedResponse_QuotaConfiguration_Schema = {
|
||||
title: "PaginatedResponse[QuotaConfiguration]",
|
||||
} as const;
|
||||
|
||||
export const PaginatedResponse_SpreadsheetJob_Schema = {
|
||||
properties: {
|
||||
items: {
|
||||
items: {
|
||||
$ref: "#/components/schemas/SpreadsheetJob",
|
||||
},
|
||||
type: "array",
|
||||
title: "Items",
|
||||
description: "The list of items.",
|
||||
},
|
||||
next_page_token: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Next Page Token",
|
||||
description:
|
||||
"A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages.",
|
||||
},
|
||||
total_size: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "integer",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Total Size",
|
||||
description:
|
||||
"The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only.",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["items"],
|
||||
title: "PaginatedResponse[SpreadsheetJob]",
|
||||
} as const;
|
||||
|
||||
export const ParseConfigurationSchema = {
|
||||
properties: {
|
||||
id: {
|
||||
@@ -17841,69 +17808,6 @@ export const ProjectUpdateSchema = {
|
||||
description: "Schema for updating a project.",
|
||||
} as const;
|
||||
|
||||
export const PromptConfSchema = {
|
||||
properties: {
|
||||
system_prompt: {
|
||||
type: "string",
|
||||
title: "System Prompt",
|
||||
description: "The system prompt to use for the extraction.",
|
||||
default:
|
||||
"Given a JSON schema, extract the data from the provided SOURCE TEXT according to the schema. Only output information that is explicitly stated or can be inferred from the SOURCE TEXT.",
|
||||
},
|
||||
extraction_prompt: {
|
||||
type: "string",
|
||||
title: "Extraction Prompt",
|
||||
description: "The prompt to use for the extraction.",
|
||||
default: "The extracted data using the given JSON schema.",
|
||||
},
|
||||
error_handling_prompt: {
|
||||
type: "string",
|
||||
title: "Error Handling Prompt",
|
||||
description: "The prompt to use for error handling.",
|
||||
default:
|
||||
"If the source text does not contain enough information to extract the value, explain the reason very briefly. Else, output null and fill out the value__ field.",
|
||||
},
|
||||
reasoning_prompt: {
|
||||
type: "string",
|
||||
title: "Reasoning Prompt",
|
||||
description: "The prompt to use for reasoning.",
|
||||
default: `
|
||||
Provide a brief explanation for how you arrived at the extracted value based on the source text provided.
|
||||
- For inferred values, explain the reasoning behind the extraction briefly.
|
||||
- For simple verbatim extraction, output 'VERBATIM EXTRACTION'.
|
||||
- When supporting data is not present in the source text, output 'INSUFFICIENT DATA' and emit blank or null values for the value__ field.
|
||||
`,
|
||||
},
|
||||
cite_sources_prompt: {
|
||||
additionalProperties: {
|
||||
type: "string",
|
||||
},
|
||||
type: "object",
|
||||
title: "Cite Sources Prompt",
|
||||
description: "The prompt to use for citing sources.",
|
||||
default: {
|
||||
description: `
|
||||
### Citation Rules (read carefully):
|
||||
- You must ANNOTATE every value with the MOST RELEVANT short EXACT substring from the source text that supports it.
|
||||
- For inferred values, cite the text used to infer it in the matching_text field or output 'INFERRED FROM TEXT'
|
||||
- If no support exists, output 'INSUFFICIENT DATA' and leave value__ null or '', 0.0, False etc depending on the type of the field.
|
||||
`,
|
||||
page: "Cite the page number of the source text that the extracted value is from. The page number is the integer that appears right after <<<PAGE:. If no page number is present in this format, use the default value of 1.",
|
||||
matching_text:
|
||||
'Cite the **MOST RELEVANT EXACT TEXT from the SOURCE TEXT** that supports the extracted value within 80 characters. If the exact substring is >80 chars, truncate with ellipsis "...". Provide only the single most relevant citation.',
|
||||
},
|
||||
},
|
||||
scratchpad_prompt: {
|
||||
type: "string",
|
||||
title: "Scratchpad Prompt",
|
||||
description: "The prompt to use for scratchpad.",
|
||||
default: "Use for intermediate step-by-step reasoning. Be concise.",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
title: "PromptConf",
|
||||
} as const;
|
||||
|
||||
export const PublicModelNameSchema = {
|
||||
type: "string",
|
||||
enum: [
|
||||
@@ -17926,6 +17830,7 @@ export const PublicModelNameSchema = {
|
||||
"gemini-2.5-pro",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.5-flash-lite",
|
||||
"gemini-1.5-flash",
|
||||
"gemini-1.5-pro",
|
||||
],
|
||||
@@ -18752,12 +18657,6 @@ export const RoleSchema = {
|
||||
description: "Schema for a role.",
|
||||
} as const;
|
||||
|
||||
export const SchemaRelaxModeSchema = {
|
||||
type: "string",
|
||||
enum: ["FULL", "TOP_LEVEL", "LEAF"],
|
||||
title: "SchemaRelaxMode",
|
||||
} as const;
|
||||
|
||||
export const SearchRequestSchema = {
|
||||
properties: {
|
||||
page_size: {
|
||||
@@ -18950,6 +18849,135 @@ BM25: Uses Qdrant's FastEmbed BM25 model for sparse embeddings
|
||||
AUTO: Automatically selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade)`,
|
||||
} as const;
|
||||
|
||||
export const SpreadsheetJobSchema = {
|
||||
properties: {
|
||||
id: {
|
||||
type: "string",
|
||||
title: "Id",
|
||||
description: "The ID of the job",
|
||||
},
|
||||
user_id: {
|
||||
type: "string",
|
||||
title: "User Id",
|
||||
description: "The ID of the user",
|
||||
},
|
||||
project_id: {
|
||||
type: "string",
|
||||
format: "uuid",
|
||||
title: "Project Id",
|
||||
description: "The ID of the project",
|
||||
},
|
||||
file_id: {
|
||||
type: "string",
|
||||
format: "uuid",
|
||||
title: "File Id",
|
||||
description: "The ID of the file to parse",
|
||||
},
|
||||
config: {
|
||||
$ref: "#/components/schemas/SpreadsheetParsingConfig",
|
||||
description: "Configuration for the parsing job",
|
||||
},
|
||||
status: {
|
||||
$ref: "#/components/schemas/StatusEnum",
|
||||
description: "The status of the parsing job",
|
||||
},
|
||||
created_at: {
|
||||
type: "string",
|
||||
title: "Created At",
|
||||
description: "When the job was created",
|
||||
},
|
||||
updated_at: {
|
||||
type: "string",
|
||||
title: "Updated At",
|
||||
description: "When the job was last updated",
|
||||
},
|
||||
success: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "boolean",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Success",
|
||||
description: "Whether the job completed successfully",
|
||||
},
|
||||
tables: {
|
||||
items: {
|
||||
$ref: "#/components/schemas/ExtractedTable",
|
||||
},
|
||||
type: "array",
|
||||
title: "Tables",
|
||||
description: "All extracted tables (populated when job is complete)",
|
||||
},
|
||||
errors: {
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
type: "array",
|
||||
title: "Errors",
|
||||
description: "Any errors encountered",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: [
|
||||
"id",
|
||||
"user_id",
|
||||
"project_id",
|
||||
"file_id",
|
||||
"config",
|
||||
"status",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
],
|
||||
title: "SpreadsheetJob",
|
||||
description: "A spreadsheet parsing job",
|
||||
} as const;
|
||||
|
||||
export const SpreadsheetJobCreateSchema = {
|
||||
properties: {
|
||||
file_id: {
|
||||
type: "string",
|
||||
format: "uuid",
|
||||
title: "File Id",
|
||||
description: "The ID of the file to parse",
|
||||
},
|
||||
config: {
|
||||
$ref: "#/components/schemas/SpreadsheetParsingConfig",
|
||||
description: "Configuration for the parsing job",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
required: ["file_id"],
|
||||
title: "SpreadsheetJobCreate",
|
||||
description: "Request to create a spreadsheet parsing job",
|
||||
} as const;
|
||||
|
||||
export const SpreadsheetParsingConfigSchema = {
|
||||
properties: {
|
||||
sheet_names: {
|
||||
anyOf: [
|
||||
{
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
type: "array",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
title: "Sheet Names",
|
||||
description:
|
||||
"The names of the sheets to parse. If empty, all sheets will be parsed.",
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
title: "SpreadsheetParsingConfig",
|
||||
description: "Configuration for spreadsheet parsing",
|
||||
} as const;
|
||||
|
||||
export const StatusEnumSchema = {
|
||||
type: "string",
|
||||
enum: ["PENDING", "SUCCESS", "ERROR", "PARTIAL_SUCCESS", "CANCELLED"],
|
||||
@@ -18957,101 +18985,6 @@ export const StatusEnumSchema = {
|
||||
description: "Enum for representing the status of a job",
|
||||
} as const;
|
||||
|
||||
export const StructModeSchema = {
|
||||
type: "string",
|
||||
enum: [
|
||||
"STRUCT_PARSE",
|
||||
"JSON_MODE",
|
||||
"FUNC_CALL",
|
||||
"STRUCT_RELAXED",
|
||||
"UNSTRUCTURED",
|
||||
],
|
||||
title: "StructMode",
|
||||
} as const;
|
||||
|
||||
export const StructParseConfSchema = {
|
||||
properties: {
|
||||
model: {
|
||||
$ref: "#/components/schemas/ExtractModels",
|
||||
description: "The model to use for the structured parsing.",
|
||||
default: "openai-gpt-4-1",
|
||||
},
|
||||
temperature: {
|
||||
type: "number",
|
||||
title: "Temperature",
|
||||
description: "The temperature to use for the structured parsing.",
|
||||
default: 0,
|
||||
},
|
||||
relaxation_mode: {
|
||||
$ref: "#/components/schemas/SchemaRelaxMode",
|
||||
description: "The relaxation mode to use for the structured parsing.",
|
||||
default: "LEAF",
|
||||
},
|
||||
struct_mode: {
|
||||
$ref: "#/components/schemas/StructMode",
|
||||
description: "The struct mode to use for the structured parsing.",
|
||||
default: "STRUCT_PARSE",
|
||||
},
|
||||
fetch_logprobs: {
|
||||
type: "boolean",
|
||||
title: "Fetch Logprobs",
|
||||
description: "Whether to fetch logprobs for the structured parsing.",
|
||||
default: false,
|
||||
},
|
||||
handle_missing: {
|
||||
type: "boolean",
|
||||
title: "Handle Missing",
|
||||
description: "Whether to handle missing fields in the schema.",
|
||||
default: false,
|
||||
},
|
||||
use_reasoning: {
|
||||
type: "boolean",
|
||||
title: "Use Reasoning",
|
||||
description: "Whether to use reasoning for the structured extraction.",
|
||||
default: false,
|
||||
},
|
||||
cite_sources: {
|
||||
type: "boolean",
|
||||
title: "Cite Sources",
|
||||
description: "Whether to cite sources for the structured extraction.",
|
||||
default: false,
|
||||
},
|
||||
prompt_conf: {
|
||||
$ref: "#/components/schemas/PromptConf",
|
||||
description: "The prompt configuration for the structured parsing.",
|
||||
default: {
|
||||
system_prompt:
|
||||
"Given a JSON schema, extract the data from the provided SOURCE TEXT according to the schema. Only output information that is explicitly stated or can be inferred from the SOURCE TEXT.",
|
||||
extraction_prompt: "The extracted data using the given JSON schema.",
|
||||
error_handling_prompt:
|
||||
"If the source text does not contain enough information to extract the value, explain the reason very briefly. Else, output null and fill out the value__ field.",
|
||||
reasoning_prompt: `
|
||||
Provide a brief explanation for how you arrived at the extracted value based on the source text provided.
|
||||
- For inferred values, explain the reasoning behind the extraction briefly.
|
||||
- For simple verbatim extraction, output 'VERBATIM EXTRACTION'.
|
||||
- When supporting data is not present in the source text, output 'INSUFFICIENT DATA' and emit blank or null values for the value__ field.
|
||||
`,
|
||||
cite_sources_prompt: {
|
||||
description: `
|
||||
### Citation Rules (read carefully):
|
||||
- You must ANNOTATE every value with the MOST RELEVANT short EXACT substring from the source text that supports it.
|
||||
- For inferred values, cite the text used to infer it in the matching_text field or output 'INFERRED FROM TEXT'
|
||||
- If no support exists, output 'INSUFFICIENT DATA' and leave value__ null or '', 0.0, False etc depending on the type of the field.
|
||||
`,
|
||||
matching_text:
|
||||
'Cite the **MOST RELEVANT EXACT TEXT from the SOURCE TEXT** that supports the extracted value within 80 characters. If the exact substring is >80 chars, truncate with ellipsis "...". Provide only the single most relevant citation.',
|
||||
page: "Cite the page number of the source text that the extracted value is from. The page number is the integer that appears right after <<<PAGE:. If no page number is present in this format, use the default value of 1.",
|
||||
},
|
||||
scratchpad_prompt:
|
||||
"Use for intermediate step-by-step reasoning. Be concise.",
|
||||
},
|
||||
},
|
||||
},
|
||||
type: "object",
|
||||
title: "StructParseConf",
|
||||
description: "Configuration for the structured parsing agent.",
|
||||
} as const;
|
||||
|
||||
export const SupportedLLMModelSchema = {
|
||||
properties: {
|
||||
name: {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -691,115 +691,6 @@ export const zBodyRunJobOnFileApiV1ExtractionJobsFilePost = z.object({
|
||||
config_override: z.union([z.string(), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zExtractTarget = z.enum(["PER_DOC", "PER_PAGE"]);
|
||||
|
||||
export const zExtractMode = z.enum([
|
||||
"FAST",
|
||||
"BALANCED",
|
||||
"PREMIUM",
|
||||
"MULTIMODAL",
|
||||
]);
|
||||
|
||||
export const zPublicModelName = z.enum([
|
||||
"openai-gpt-4o",
|
||||
"openai-gpt-4o-mini",
|
||||
"openai-gpt-4-1",
|
||||
"openai-gpt-4-1-mini",
|
||||
"openai-gpt-4-1-nano",
|
||||
"openai-gpt-5",
|
||||
"openai-gpt-5-mini",
|
||||
"openai-gpt-5-nano",
|
||||
"openai-text-embedding-3-small",
|
||||
"openai-text-embedding-3-large",
|
||||
"openai-whisper-1",
|
||||
"anthropic-sonnet-3.5",
|
||||
"anthropic-sonnet-3.5-v2",
|
||||
"anthropic-sonnet-3.7",
|
||||
"anthropic-sonnet-4.0",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-pro",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-1.5-flash",
|
||||
"gemini-1.5-pro",
|
||||
]);
|
||||
|
||||
export const zExtractModels = z.enum([
|
||||
"openai-gpt-4-1",
|
||||
"openai-gpt-4-1-mini",
|
||||
"openai-gpt-4-1-nano",
|
||||
"openai-gpt-5",
|
||||
"openai-gpt-5-mini",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-pro",
|
||||
"openai-gpt-4o",
|
||||
"openai-gpt-4o-mini",
|
||||
]);
|
||||
|
||||
export const zDocumentChunkMode = z.enum(["PAGE", "SECTION"]);
|
||||
|
||||
export const zExtractConfig = z.object({
|
||||
priority: z
|
||||
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
|
||||
.optional(),
|
||||
extraction_target: zExtractTarget.optional(),
|
||||
extraction_mode: zExtractMode.optional(),
|
||||
parse_model: z.union([zPublicModelName, z.null()]).optional(),
|
||||
extract_model: z.union([zExtractModels, z.null()]).optional(),
|
||||
multimodal_fast_mode: z.boolean().optional().default(false),
|
||||
system_prompt: z.union([z.string(), z.null()]).optional(),
|
||||
use_reasoning: z.boolean().optional().default(false),
|
||||
cite_sources: z.boolean().optional().default(false),
|
||||
confidence_scores: z.boolean().optional().default(false),
|
||||
chunk_mode: zDocumentChunkMode.optional(),
|
||||
high_resolution_mode: z.boolean().optional().default(false),
|
||||
invalidate_cache: z.boolean().optional().default(false),
|
||||
page_range: z.union([z.string(), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zExtractJobCreate = z.object({
|
||||
priority: z
|
||||
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
|
||||
.optional(),
|
||||
webhook_configurations: z
|
||||
.union([z.array(zWebhookConfiguration), z.null()])
|
||||
.optional(),
|
||||
extraction_agent_id: z.string().uuid(),
|
||||
file_id: z.string().uuid(),
|
||||
data_schema_override: z
|
||||
.union([z.object({}), z.string(), z.null()])
|
||||
.optional(),
|
||||
config_override: z.union([zExtractConfig, z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zChunkMode = z.enum([
|
||||
"PAGE",
|
||||
"DOCUMENT",
|
||||
"SECTION",
|
||||
"GROUPED_PAGES",
|
||||
]);
|
||||
|
||||
export const zMultimodalParseResolution = z.enum(["medium", "high"]);
|
||||
|
||||
export const zLlamaExtractSettings = z.object({
|
||||
max_file_size: z.number().int().optional().default(104857600),
|
||||
max_file_size_ui: z.number().int().optional().default(31457280),
|
||||
max_pages: z.number().int().optional().default(500),
|
||||
chunk_mode: zChunkMode.optional(),
|
||||
max_chunk_size: z.number().int().optional().default(10000),
|
||||
extraction_agent_config: z.object({}).optional(),
|
||||
use_multimodal_parsing: z.boolean().optional().default(false),
|
||||
use_pixel_extraction: z.boolean().optional().default(false),
|
||||
llama_parse_params: zLlamaParseParameters.optional(),
|
||||
multimodal_parse_resolution: zMultimodalParseResolution.optional(),
|
||||
});
|
||||
|
||||
export const zBodyRunJobTestUserApiV1ExtractionJobsTestPost = z.object({
|
||||
job_create: zExtractJobCreate,
|
||||
extract_settings: z.union([zLlamaExtractSettings, z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zBodyScreenshotApiParsingScreenshotPost = z.object({
|
||||
file: z.union([z.string(), z.null()]).optional(),
|
||||
do_not_cache: z.boolean().optional().default(false),
|
||||
@@ -1072,11 +963,6 @@ export const zBodyUploadFileApiV1ParsingUploadPost = z.object({
|
||||
page_footer_suffix: z.string().optional(),
|
||||
});
|
||||
|
||||
export const zBodyUploadFileV2ApiV2Alpha1ParseUploadPost = z.object({
|
||||
configuration: z.string(),
|
||||
file: z.union([z.string(), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zBoxAuthMechanism = z.enum(["developer_token", "ccg"]);
|
||||
|
||||
export const zSupportedLlmModelNames = z.enum([
|
||||
@@ -1700,6 +1586,7 @@ export const zCustomClaims = z.object({
|
||||
allowed_classify: z.boolean().optional().default(true),
|
||||
api_datasource_access: z.boolean().optional().default(false),
|
||||
allow_org_deletion: z.boolean().optional().default(false),
|
||||
allowed_spreadsheet: z.boolean().optional().default(false),
|
||||
});
|
||||
|
||||
export const zCustomerPortalSessionCreatePayload = z.object({
|
||||
@@ -1855,6 +1742,16 @@ export const zDefaultOrganizationUpdate = z.object({
|
||||
organization_id: z.string().uuid(),
|
||||
});
|
||||
|
||||
export const zDeleteRequest = z.object({
|
||||
deployment_name: z.string(),
|
||||
collection: z.string().optional().default("default"),
|
||||
filter: z.union([z.object({}), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zDeleteResponse = z.object({
|
||||
deleted_count: z.number().int(),
|
||||
});
|
||||
|
||||
export const zRetrieverPipeline = z.object({
|
||||
name: z.union([z.string().min(1).max(3000), z.null()]),
|
||||
description: z.union([z.string().max(15000), z.null()]),
|
||||
@@ -1870,6 +1767,8 @@ export const zDirectRetrievalParams = z.object({
|
||||
pipelines: z.array(zRetrieverPipeline).optional(),
|
||||
});
|
||||
|
||||
export const zDocumentChunkMode = z.enum(["PAGE", "SECTION"]);
|
||||
|
||||
export const zDocumentIngestionJobParams = z.object({
|
||||
custom_metadata: z.union([z.object({}), z.null()]).optional(),
|
||||
resource_info: z.union([z.object({}), z.null()]).optional(),
|
||||
@@ -2122,6 +2021,74 @@ Query: {query_str}
|
||||
Answer: `),
|
||||
});
|
||||
|
||||
export const zExtractTarget = z.enum(["PER_DOC", "PER_PAGE"]);
|
||||
|
||||
export const zExtractMode = z.enum([
|
||||
"FAST",
|
||||
"BALANCED",
|
||||
"PREMIUM",
|
||||
"MULTIMODAL",
|
||||
]);
|
||||
|
||||
export const zPublicModelName = z.enum([
|
||||
"openai-gpt-4o",
|
||||
"openai-gpt-4o-mini",
|
||||
"openai-gpt-4-1",
|
||||
"openai-gpt-4-1-mini",
|
||||
"openai-gpt-4-1-nano",
|
||||
"openai-gpt-5",
|
||||
"openai-gpt-5-mini",
|
||||
"openai-gpt-5-nano",
|
||||
"openai-text-embedding-3-small",
|
||||
"openai-text-embedding-3-large",
|
||||
"openai-whisper-1",
|
||||
"anthropic-sonnet-3.5",
|
||||
"anthropic-sonnet-3.5-v2",
|
||||
"anthropic-sonnet-3.7",
|
||||
"anthropic-sonnet-4.0",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-pro",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.5-flash-lite",
|
||||
"gemini-1.5-flash",
|
||||
"gemini-1.5-pro",
|
||||
]);
|
||||
|
||||
export const zExtractModels = z.enum([
|
||||
"openai-gpt-4-1",
|
||||
"openai-gpt-4-1-mini",
|
||||
"openai-gpt-4-1-nano",
|
||||
"openai-gpt-5",
|
||||
"openai-gpt-5-mini",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-flash-lite",
|
||||
"gemini-2.5-pro",
|
||||
"openai-gpt-4o",
|
||||
"openai-gpt-4o-mini",
|
||||
]);
|
||||
|
||||
export const zExtractConfig = z.object({
|
||||
priority: z
|
||||
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
|
||||
.optional(),
|
||||
extraction_target: zExtractTarget.optional(),
|
||||
extraction_mode: zExtractMode.optional(),
|
||||
parse_model: z.union([zPublicModelName, z.null()]).optional(),
|
||||
extract_model: z.union([zExtractModels, z.null()]).optional(),
|
||||
multimodal_fast_mode: z.boolean().optional().default(false),
|
||||
system_prompt: z.union([z.string(), z.null()]).optional(),
|
||||
use_reasoning: z.boolean().optional().default(false),
|
||||
cite_sources: z.boolean().optional().default(false),
|
||||
confidence_scores: z.boolean().optional().default(false),
|
||||
chunk_mode: zDocumentChunkMode.optional(),
|
||||
high_resolution_mode: z.boolean().optional().default(false),
|
||||
invalidate_cache: z.boolean().optional().default(false),
|
||||
num_pages_context: z.union([z.number().int().gte(1), z.null()]).optional(),
|
||||
page_range: z.union([z.string(), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zExtractAgent = z.object({
|
||||
id: z.string().uuid(),
|
||||
name: z.string(),
|
||||
@@ -2167,6 +2134,21 @@ export const zExtractJob = z.object({
|
||||
file: zFile,
|
||||
});
|
||||
|
||||
export const zExtractJobCreate = z.object({
|
||||
priority: z
|
||||
.union([z.enum(["low", "medium", "high", "critical"]), z.null()])
|
||||
.optional(),
|
||||
webhook_configurations: z
|
||||
.union([z.array(zWebhookConfiguration), z.null()])
|
||||
.optional(),
|
||||
extraction_agent_id: z.string().uuid(),
|
||||
file_id: z.string().uuid(),
|
||||
data_schema_override: z
|
||||
.union([z.object({}), z.string(), z.null()])
|
||||
.optional(),
|
||||
config_override: z.union([zExtractConfig, z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zExtractJobCreateBatch = z.object({
|
||||
extraction_agent_id: z.string().uuid(),
|
||||
file_ids: z.array(z.string().uuid()).min(1),
|
||||
@@ -2234,6 +2216,15 @@ export const zExtractStatelessRequest = z.object({
|
||||
file: z.union([zFileData, z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zExtractedTable = z.object({
|
||||
table_id: z.number().int(),
|
||||
sheet_name: z.string(),
|
||||
row_span: z.number().int(),
|
||||
col_span: z.number().int(),
|
||||
has_headers: z.boolean(),
|
||||
metadata_json: z.union([z.string(), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zFileCountByStatusResponse = z.object({
|
||||
counts: z.object({}),
|
||||
total_count: z.number().int(),
|
||||
@@ -2987,6 +2978,30 @@ export const zPaginatedResponseQuotaConfiguration = z.object({
|
||||
items: z.array(zQuotaConfiguration),
|
||||
});
|
||||
|
||||
export const zSpreadsheetParsingConfig = z.object({
|
||||
sheet_names: z.union([z.array(z.string()), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zSpreadsheetJob = z.object({
|
||||
id: z.string(),
|
||||
user_id: z.string(),
|
||||
project_id: z.string().uuid(),
|
||||
file_id: z.string().uuid(),
|
||||
config: zSpreadsheetParsingConfig,
|
||||
status: zStatusEnum,
|
||||
created_at: z.string(),
|
||||
updated_at: z.string(),
|
||||
success: z.union([z.boolean(), z.null()]).optional(),
|
||||
tables: z.array(zExtractedTable).optional(),
|
||||
errors: z.array(z.string()).optional(),
|
||||
});
|
||||
|
||||
export const zPaginatedResponseSpreadsheetJob = z.object({
|
||||
items: z.array(zSpreadsheetJob),
|
||||
next_page_token: z.union([z.string(), z.null()]).optional(),
|
||||
total_size: z.union([z.number().int(), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zParseConfiguration = z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
@@ -3400,49 +3415,6 @@ export const zProjectUpdate = z.object({
|
||||
name: z.string().min(1).max(3000),
|
||||
});
|
||||
|
||||
export const zPromptConf = z.object({
|
||||
system_prompt: z
|
||||
.string()
|
||||
.optional()
|
||||
.default(
|
||||
"Given a JSON schema, extract the data from the provided SOURCE TEXT according to the schema. Only output information that is explicitly stated or can be inferred from the SOURCE TEXT.",
|
||||
),
|
||||
extraction_prompt: z
|
||||
.string()
|
||||
.optional()
|
||||
.default("The extracted data using the given JSON schema."),
|
||||
error_handling_prompt: z
|
||||
.string()
|
||||
.optional()
|
||||
.default(
|
||||
"If the source text does not contain enough information to extract the value, explain the reason very briefly. Else, output null and fill out the value__ field.",
|
||||
),
|
||||
reasoning_prompt: z.string().optional().default(`
|
||||
Provide a brief explanation for how you arrived at the extracted value based on the source text provided.
|
||||
- For inferred values, explain the reasoning behind the extraction briefly.
|
||||
- For simple verbatim extraction, output 'VERBATIM EXTRACTION'.
|
||||
- When supporting data is not present in the source text, output 'INSUFFICIENT DATA' and emit blank or null values for the value__ field.
|
||||
`),
|
||||
cite_sources_prompt: z
|
||||
.object({})
|
||||
.optional()
|
||||
.default({
|
||||
description: `
|
||||
### Citation Rules (read carefully):
|
||||
- You must ANNOTATE every value with the MOST RELEVANT short EXACT substring from the source text that supports it.
|
||||
- For inferred values, cite the text used to infer it in the matching_text field or output 'INFERRED FROM TEXT'
|
||||
- If no support exists, output 'INSUFFICIENT DATA' and leave value__ null or '', 0.0, False etc depending on the type of the field.
|
||||
`,
|
||||
page: "Cite the page number of the source text that the extracted value is from. The page number is the integer that appears right after <<<PAGE:. If no page number is present in this format, use the default value of 1.",
|
||||
matching_text:
|
||||
'Cite the **MOST RELEVANT EXACT TEXT from the SOURCE TEXT** that supports the extracted value within 80 characters. If the exact substring is >80 chars, truncate with ellipsis "...". Provide only the single most relevant citation.',
|
||||
}),
|
||||
scratchpad_prompt: z
|
||||
.string()
|
||||
.optional()
|
||||
.default("Use for intermediate step-by-step reasoning. Be concise."),
|
||||
});
|
||||
|
||||
export const zRelatedNodeInfo = z.object({
|
||||
node_id: z.string(),
|
||||
node_type: z.union([zObjectType, z.string(), z.null()]).optional(),
|
||||
@@ -3545,8 +3517,6 @@ export const zRole = z.object({
|
||||
permissions: z.array(zPermission),
|
||||
});
|
||||
|
||||
export const zSchemaRelaxMode = z.enum(["FULL", "TOP_LEVEL", "LEAF"]);
|
||||
|
||||
export const zSearchRequest = z.object({
|
||||
page_size: z.union([z.number().int(), z.null()]).optional(),
|
||||
page_token: z.union([z.string(), z.null()]).optional(),
|
||||
@@ -3558,24 +3528,9 @@ export const zSearchRequest = z.object({
|
||||
offset: z.union([z.number().int().gte(0).lte(1000), z.null()]).optional(),
|
||||
});
|
||||
|
||||
export const zStructMode = z.enum([
|
||||
"STRUCT_PARSE",
|
||||
"JSON_MODE",
|
||||
"FUNC_CALL",
|
||||
"STRUCT_RELAXED",
|
||||
"UNSTRUCTURED",
|
||||
]);
|
||||
|
||||
export const zStructParseConf = z.object({
|
||||
model: zExtractModels.optional(),
|
||||
temperature: z.number().optional().default(0),
|
||||
relaxation_mode: zSchemaRelaxMode.optional(),
|
||||
struct_mode: zStructMode.optional(),
|
||||
fetch_logprobs: z.boolean().optional().default(false),
|
||||
handle_missing: z.boolean().optional().default(false),
|
||||
use_reasoning: z.boolean().optional().default(false),
|
||||
cite_sources: z.boolean().optional().default(false),
|
||||
prompt_conf: zPromptConf.optional(),
|
||||
export const zSpreadsheetJobCreate = z.object({
|
||||
file_id: z.string().uuid(),
|
||||
config: zSpreadsheetParsingConfig.optional(),
|
||||
});
|
||||
|
||||
export const zSupportedLlmModel = z.object({
|
||||
@@ -4017,6 +3972,33 @@ export const zCreateIntentAndCustomerSessionApiV1BillingCreateIntentAndCustomerS
|
||||
export const zGetMetronomeDashboardApiV1BillingMetronomeDashboardGetResponse =
|
||||
zMetronomeDashboardResponse;
|
||||
|
||||
export const zListJobsApiV1ExtractionJobsGetResponse = z.array(zExtractJob);
|
||||
|
||||
export const zRunJobApiV1ExtractionJobsPostResponse = zExtractJob;
|
||||
|
||||
export const zGetJobApiV1ExtractionJobsJobIdGetResponse = zExtractJob;
|
||||
|
||||
export const zRunJobOnFileApiV1ExtractionJobsFilePostResponse = zExtractJob;
|
||||
|
||||
export const zRunBatchJobsApiV1ExtractionJobsBatchPostResponse =
|
||||
z.array(zExtractJob);
|
||||
|
||||
export const zGetJobResultApiV1ExtractionJobsJobIdResultGetResponse =
|
||||
zExtractResultset;
|
||||
|
||||
export const zListExtractRunsApiV1ExtractionRunsGetResponse =
|
||||
zPaginatedExtractRunsResponse;
|
||||
|
||||
export const zGetLatestRunFromUiApiV1ExtractionRunsLatestFromUiGetResponse =
|
||||
z.union([zExtractRun, z.null()]);
|
||||
|
||||
export const zGetRunByJobIdApiV1ExtractionRunsByJobJobIdGetResponse =
|
||||
zExtractRun;
|
||||
|
||||
export const zGetRunApiV1ExtractionRunsRunIdGetResponse = zExtractRun;
|
||||
|
||||
export const zExtractStatelessApiV1ExtractionRunPostResponse = zExtractJob;
|
||||
|
||||
export const zListExtractionAgentsApiV1ExtractionExtractionAgentsGetResponse =
|
||||
z.array(zExtractAgent);
|
||||
|
||||
@@ -4041,35 +4023,6 @@ export const zGetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentId
|
||||
export const zUpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponse =
|
||||
zExtractAgent;
|
||||
|
||||
export const zListJobsApiV1ExtractionJobsGetResponse = z.array(zExtractJob);
|
||||
|
||||
export const zRunJobApiV1ExtractionJobsPostResponse = zExtractJob;
|
||||
|
||||
export const zGetJobApiV1ExtractionJobsJobIdGetResponse = zExtractJob;
|
||||
|
||||
export const zRunJobTestUserApiV1ExtractionJobsTestPostResponse = zExtractJob;
|
||||
|
||||
export const zRunJobOnFileApiV1ExtractionJobsFilePostResponse = zExtractJob;
|
||||
|
||||
export const zRunBatchJobsApiV1ExtractionJobsBatchPostResponse =
|
||||
z.array(zExtractJob);
|
||||
|
||||
export const zGetJobResultApiV1ExtractionJobsJobIdResultGetResponse =
|
||||
zExtractResultset;
|
||||
|
||||
export const zListExtractRunsApiV1ExtractionRunsGetResponse =
|
||||
zPaginatedExtractRunsResponse;
|
||||
|
||||
export const zGetLatestRunFromUiApiV1ExtractionRunsLatestFromUiGetResponse =
|
||||
z.union([zExtractRun, z.null()]);
|
||||
|
||||
export const zGetRunByJobIdApiV1ExtractionRunsByJobJobIdGetResponse =
|
||||
zExtractRun;
|
||||
|
||||
export const zGetRunApiV1ExtractionRunsRunIdGetResponse = zExtractRun;
|
||||
|
||||
export const zExtractStatelessApiV1ExtractionRunPostResponse = zExtractJob;
|
||||
|
||||
export const zListApiKeysApiV1BetaApiKeysGetResponse = zApiKeyQueryResponse;
|
||||
|
||||
export const zCreateApiKeyApiV1BetaApiKeysPostResponse = zApiKey;
|
||||
@@ -4100,6 +4053,9 @@ export const zSearchAgentDataApiV1BetaAgentDataSearchPostResponse =
|
||||
export const zAggregateAgentDataApiV1BetaAgentDataAggregatePostResponse =
|
||||
zPaginatedResponseAggregateGroup;
|
||||
|
||||
export const zDeleteAgentDataByQueryApiV1BetaAgentDataDeletePostResponse =
|
||||
zDeleteResponse;
|
||||
|
||||
export const zListQuotaConfigurationsApiV1BetaQuotaManagementGetResponse =
|
||||
zPaginatedResponseQuotaConfiguration;
|
||||
|
||||
@@ -4135,6 +4091,18 @@ export const zQueryParseConfigurationsApiV1BetaParseConfigurationsQueryPostRespo
|
||||
export const zGetLatestParseConfigurationApiV1BetaParseConfigurationsLatestGetResponse =
|
||||
z.union([zParseConfiguration, z.null()]);
|
||||
|
||||
export const zListSpreadsheetJobsApiV1BetaSpreadsheetJobsGetResponse =
|
||||
zPaginatedResponseSpreadsheetJob;
|
||||
|
||||
export const zCreateSpreadsheetJobApiV1BetaSpreadsheetJobsPostResponse =
|
||||
zSpreadsheetJob;
|
||||
|
||||
export const zGetSpreadsheetJobApiV1BetaSpreadsheetJobsSpreadsheetJobIdGetResponse =
|
||||
zSpreadsheetJob;
|
||||
|
||||
export const zGetTableDownloadPresignedUrlApiV1BetaSpreadsheetJobsSpreadsheetJobIdTablesTableIdResultGetResponse =
|
||||
zPresignedUrl;
|
||||
|
||||
export const zUploadFileV2ApiV2Alpha1ParseUploadPostResponse = zParsingJob;
|
||||
|
||||
export const zGetSupportedFileExtensionsApiParsingSupportedFileExtensionsGetResponse =
|
||||
|
||||
@@ -0,0 +1,246 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { AgentClient, createAgentDataClient } from "../src/beta/agent/index.js";
|
||||
import * as sdk from "../src/client/index.js";
|
||||
|
||||
describe("AgentClient", () => {
|
||||
beforeEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("createItem sends correct payload and returns typed data", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "createAgentDataApiV1BetaAgentDataPost")
|
||||
.mockResolvedValue({
|
||||
data: {
|
||||
id: "1",
|
||||
deployment_name: "dep",
|
||||
collection: "col",
|
||||
data: { foo: "bar" },
|
||||
created_at: "2024-01-01T00:00:00Z",
|
||||
updated_at: "2024-01-01T00:00:00Z",
|
||||
},
|
||||
} as any);
|
||||
|
||||
const client = new AgentClient<{ foo: string }>({
|
||||
deploymentName: "dep",
|
||||
collection: "col",
|
||||
});
|
||||
const result = await client.createItem({ foo: "bar" });
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
const call = spy.mock.calls[0][0];
|
||||
expect(call.body.deployment_name).toBe("dep");
|
||||
expect(call.body.collection).toBe("col");
|
||||
expect(call.body.data).toEqual({ foo: "bar" });
|
||||
|
||||
expect(result.id).toBe("1");
|
||||
expect(result.deploymentName).toBe("dep");
|
||||
expect(result.collection).toBe("col");
|
||||
expect(result.data).toEqual({ foo: "bar" });
|
||||
expect(result.createdAt).toEqual(new Date("2024-01-01T00:00:00Z"));
|
||||
expect(result.updatedAt).toEqual(new Date("2024-01-01T00:00:00Z"));
|
||||
});
|
||||
|
||||
it("getItem returns null for 404 errors", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "getAgentDataApiV1BetaAgentDataItemIdGet")
|
||||
.mockImplementation(async () => {
|
||||
const err: any = new Error("Not found");
|
||||
err.response = { status: 404 };
|
||||
throw err;
|
||||
});
|
||||
|
||||
const client = new AgentClient({ deploymentName: "dep" });
|
||||
const res = await client.getItem("missing-id");
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
expect(res).toBeNull();
|
||||
});
|
||||
|
||||
it("updateItem updates and returns typed data", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "updateAgentDataApiV1BetaAgentDataItemIdPut")
|
||||
.mockResolvedValue({
|
||||
data: {
|
||||
id: "123",
|
||||
deployment_name: "dep",
|
||||
collection: "col",
|
||||
data: { foo: "baz" },
|
||||
created_at: "2024-01-01T00:00:00Z",
|
||||
updated_at: "2024-01-02T00:00:00Z",
|
||||
},
|
||||
} as any);
|
||||
|
||||
const client = new AgentClient<{ foo: string }>({
|
||||
deploymentName: "dep",
|
||||
collection: "col",
|
||||
});
|
||||
const res = await client.updateItem("123", { foo: "baz" });
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
const call = spy.mock.calls[0][0];
|
||||
expect(call.path.item_id).toBe("123");
|
||||
expect(call.body.data).toEqual({ foo: "baz" });
|
||||
|
||||
expect(res.id).toBe("123");
|
||||
expect(res.updatedAt).toEqual(new Date("2024-01-02T00:00:00Z"));
|
||||
});
|
||||
|
||||
it("deleteItem calls delete endpoint with correct path", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "deleteAgentDataApiV1BetaAgentDataItemIdDelete")
|
||||
.mockResolvedValue({} as any);
|
||||
|
||||
const client = new AgentClient({ deploymentName: "dep" });
|
||||
await client.deleteItem("abc");
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
expect(spy.mock.calls[0][0].path.item_id).toBe("abc");
|
||||
});
|
||||
|
||||
it("delete by query returns deleted count", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "deleteAgentDataByQueryApiV1BetaAgentDataDeletePost")
|
||||
.mockResolvedValue({ data: { deleted_count: 7 } } as any);
|
||||
|
||||
const client = new AgentClient({
|
||||
deploymentName: "dep",
|
||||
collection: "col",
|
||||
});
|
||||
const count = await client.delete({
|
||||
filter: { status: { op: "eq", value: "accepted" } as any },
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
const body = spy.mock.calls[0][0].body;
|
||||
expect(body.deployment_name).toBe("dep");
|
||||
expect(body.collection).toBe("col");
|
||||
expect(count).toBe(7);
|
||||
});
|
||||
|
||||
it("search maps items and optional fields correctly", async () => {
|
||||
const now = "2024-01-01T00:00:00Z";
|
||||
const spy = vi
|
||||
.spyOn(sdk, "searchAgentDataApiV1BetaAgentDataSearchPost")
|
||||
.mockResolvedValue({
|
||||
data: {
|
||||
items: [
|
||||
{
|
||||
id: "1",
|
||||
deployment_name: "dep",
|
||||
collection: "col",
|
||||
data: { foo: "bar" },
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
},
|
||||
],
|
||||
total_size: 1,
|
||||
next_page_token: "next",
|
||||
},
|
||||
} as any);
|
||||
|
||||
const client = new AgentClient<{ foo: string }>({
|
||||
deploymentName: "dep",
|
||||
collection: "col",
|
||||
});
|
||||
const result = await client.search({
|
||||
includeTotal: true,
|
||||
orderBy: "created_at desc",
|
||||
pageSize: 1,
|
||||
offset: 0,
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
const body = spy.mock.calls[0][0].body;
|
||||
expect(body.deployment_name).toBe("dep");
|
||||
expect(body.collection).toBe("col");
|
||||
expect(body.include_total).toBe(true);
|
||||
expect(body.order_by).toBe("created_at desc");
|
||||
expect(body.page_size).toBe(1);
|
||||
expect(body.offset).toBe(0);
|
||||
|
||||
expect(result.items).toHaveLength(1);
|
||||
expect(result.totalSize).toBe(1);
|
||||
expect(result.nextPageToken).toBe("next");
|
||||
expect(result.items[0].createdAt).toEqual(new Date(now));
|
||||
});
|
||||
|
||||
it("aggregate maps groups and optional fields correctly", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "aggregateAgentDataApiV1BetaAgentDataAggregatePost")
|
||||
.mockResolvedValue({
|
||||
data: {
|
||||
items: [
|
||||
{
|
||||
group_key: { status: "accepted" },
|
||||
count: 3,
|
||||
first_item: { foo: "bar" },
|
||||
},
|
||||
],
|
||||
total_size: 1,
|
||||
next_page_token: "tok",
|
||||
},
|
||||
} as any);
|
||||
|
||||
const client = new AgentClient<{ foo: string }>({
|
||||
deploymentName: "dep",
|
||||
collection: "col",
|
||||
});
|
||||
const result = await client.aggregate({
|
||||
groupBy: ["status"],
|
||||
count: true,
|
||||
first: true,
|
||||
pageSize: 1,
|
||||
offset: 0,
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledOnce();
|
||||
const body = spy.mock.calls[0][0].body;
|
||||
expect(body.deployment_name).toBe("dep");
|
||||
expect(body.collection).toBe("col");
|
||||
expect(body.group_by).toEqual(["status"]);
|
||||
expect(body.count).toBe(true);
|
||||
expect(body.first).toBe(true);
|
||||
expect(body.page_size).toBe(1);
|
||||
expect(body.offset).toBe(0);
|
||||
|
||||
expect(result.items).toHaveLength(1);
|
||||
expect(result.totalSize).toBe(1);
|
||||
expect(result.nextPageToken).toBe("tok");
|
||||
expect(result.items[0].groupKey).toEqual({ status: "accepted" });
|
||||
expect(result.items[0].count).toBe(3);
|
||||
expect(result.items[0].firstItem).toEqual({ foo: "bar" });
|
||||
});
|
||||
|
||||
it("createAgentDataClient infers deployment name from env", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "searchAgentDataApiV1BetaAgentDataSearchPost")
|
||||
.mockResolvedValue({
|
||||
data: { items: [], total_size: 0 },
|
||||
} as any);
|
||||
|
||||
const client = createAgentDataClient({
|
||||
env: { LLAMA_DEPLOY_DEPLOYMENT_NAME: "env-dep" },
|
||||
});
|
||||
await client.search({});
|
||||
|
||||
const body = spy.mock.calls[0][0].body;
|
||||
expect(body.deployment_name).toBe("env-dep");
|
||||
});
|
||||
|
||||
it("createAgentDataClient infers deployment name from windowUrl (non-local)", async () => {
|
||||
const spy = vi
|
||||
.spyOn(sdk, "deleteAgentDataByQueryApiV1BetaAgentDataDeletePost")
|
||||
.mockResolvedValue({
|
||||
data: { deleted_count: 0 },
|
||||
} as any);
|
||||
|
||||
const client = createAgentDataClient({
|
||||
windowUrl: "https://app.llamaindex.ai/deployments/abc/ui/",
|
||||
});
|
||||
await client.delete({});
|
||||
|
||||
const body = spy.mock.calls[0][0].body;
|
||||
expect(body.deployment_name).toBe("abc");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user