mirror of
https://github.com/run-llama/llama_cloud_services.git
synced 2026-07-01 21:44:37 -04:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d073d6ed30 | |||
| 848fb8973d | |||
| 78cf778613 | |||
| 535bfb5433 | |||
| c48c1cd6c4 | |||
| e22c99bc62 | |||
| 7e9768396d | |||
| 8f221541eb | |||
| 52d8083893 | |||
| 30f12c91d6 | |||
| dd33455413 | |||
| 491900f628 | |||
| 2fa68bc4be | |||
| ec9dbc0495 | |||
| 35c4fd6478 | |||
| 6a7872b9e5 |
@@ -0,0 +1,122 @@
|
||||
# LlamaExtract Demo
|
||||
|
||||
A TypeScript demo application showcasing the power of **LlamaExract** - a structured data extraction agentic service from [LlamaCloud](https://cloud.llamaindex.ai). This demo allows you to extract structured information from scientific papers and get them into a nice markdown format.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Start the Demo](#start-the-demo)
|
||||
- [Development Mode](#development-mode)
|
||||
- [Build the Project](#build-the-project)
|
||||
- [Code Quality](#code-quality)
|
||||
- [Quick Commands Reference](#quick-commands-reference)
|
||||
- [How It Works](#how-it-works)
|
||||
- [API Dependencies](#api-dependencies)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [License](#license)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Features
|
||||
|
||||
- 📄 **Structured Data Extraction**: Extract data from your files effortlessly, and structure them the way you want!
|
||||
- 🤖 **Markdown Rendering**: Generate markdown directly from your extracted data
|
||||
- 🎨 **Beautiful CLI**: Styled console interface with colors and ASCII art
|
||||
- ⚡ **Fast Development**: Hot reload support with watch mode
|
||||
- 🛠️ **TypeScript**: Full TypeScript support with strict type checking
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js (version 18 or higher)
|
||||
- pnpm package manager
|
||||
- LlamaCloud API key
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/extract/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Set up your environment variables:
|
||||
|
||||
```bash
|
||||
# Add your API key to your environment
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the Demo
|
||||
|
||||
```bash
|
||||
npm run start
|
||||
```
|
||||
|
||||
The application will display a welcome screen and prompt you to enter the path to a document you'd like to process.
|
||||
|
||||
### Development Mode
|
||||
|
||||
For development with hot reload:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
### Build the Project
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
Format code:
|
||||
|
||||
```bash
|
||||
npm run format
|
||||
```
|
||||
|
||||
Lint code:
|
||||
|
||||
```bash
|
||||
npm run lint
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Document Input**: Enter the path to your document when prompted
|
||||
2. **Parsing**: LlamaExtract, based on the schema you can find [here](./src/schema.ts), processes the document and extracts structured data
|
||||
3. **Markdown Rendering**: The extracted content is rendered into beautiful markdown
|
||||
4. **Results**: View the results directly in your terminal
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Module Resolution Errors**: Ensure you're using Node.js 18+ and have all dependencies installed
|
||||
2. **API Key Issues**: Verify your LlamaCloud API key is correctly set
|
||||
3. **File Path Errors**: Use absolute paths or ensure relative paths are correct from the project root
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `npm run format` and `npm run lint`
|
||||
5. Submit a pull request
|
||||
Binary file not shown.
@@ -0,0 +1,14 @@
|
||||
import js from "@eslint/js";
|
||||
import globals from "globals";
|
||||
import tseslint from "typescript-eslint";
|
||||
import { defineConfig } from "eslint/config";
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
files: ["**/*.{js,mjs,cjs,ts,mts,cts}"],
|
||||
plugins: { js },
|
||||
extends: ["js/recommended"],
|
||||
languageOptions: { globals: globals.browser },
|
||||
},
|
||||
tseslint.configs.recommended,
|
||||
]);
|
||||
Generated
+3276
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"name": "llama-extract-demo",
|
||||
"version": "0.1.0",
|
||||
"description": "Demo for LlamaExtract in TypeScript",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"There are no tests\"",
|
||||
"start": "npm exec tsx src/index.ts",
|
||||
"lint": "eslint ./src/",
|
||||
"format": "prettier --write ./src/",
|
||||
"build": "tsc",
|
||||
"dev": "npm exec tsx --watch src/index.ts"
|
||||
},
|
||||
"author": "LlamaIndex",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cli-markdown": "^3.5.1",
|
||||
"consola": "^3.4.2",
|
||||
"figlet": "^1.8.2",
|
||||
"llama-cloud-services": "file:../../ts/llama_cloud_services",
|
||||
"marked": "^15.0.12",
|
||||
"marked-terminal": "^7.3.0",
|
||||
"picocolors": "^1.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.32.0",
|
||||
"@types/figlet": "^1.7.0",
|
||||
"@types/marked-terminal": "^6.1.1",
|
||||
"@types/node": "^24.2.0",
|
||||
"eslint": "^9.32.0",
|
||||
"globals": "^16.3.0",
|
||||
"jiti": "^2.5.1",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.9.2",
|
||||
"typescript-eslint": "^8.39.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
import { LlamaExtract, ExtractConfig } from "llama-cloud-services";
|
||||
import cliMarkdown from "cli-markdown";
|
||||
import { logger } from "./logger";
|
||||
import pc from "picocolors";
|
||||
import { consoleInput, renderLogo } from "./utils";
|
||||
import { dataSchema } from "./schema";
|
||||
import { renderMarkdown, ResearchData } from "./markdown";
|
||||
|
||||
export async function main(): Promise<number> {
|
||||
const extractClient = new LlamaExtract(
|
||||
process.env.LLAMA_CLOUD_API_KEY!,
|
||||
"https://api.cloud.llamaindex.ai",
|
||||
);
|
||||
await renderLogo();
|
||||
logger.log(
|
||||
`Welcome to ${pc.bold(
|
||||
pc.magentaBright("LlamaExtract Demo✨"),
|
||||
)}, our demo for ${pc.bold(pc.green("LlamaExtract"))}, a ${pc.bold(
|
||||
pc.cyan("LlamaCloud☁️"),
|
||||
)} (https://cloud.llamaindex.ai) product!.\nIn this demo we are going to try extracting relevant information ${pc.bold(
|
||||
pc.yellowBright("from scientific papers"),
|
||||
)}. Type the path to the paper you would like to process below👇\nIf you wish to exit, just type ${pc.bold(
|
||||
pc.gray("quit"),
|
||||
)}.\n`,
|
||||
);
|
||||
while (true) {
|
||||
const userInput = await consoleInput();
|
||||
if (userInput.toLowerCase() == "quit") {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const generatedData = await extractClient.extract(
|
||||
dataSchema,
|
||||
{} as ExtractConfig,
|
||||
userInput,
|
||||
);
|
||||
const research = renderMarkdown(generatedData?.data as ResearchData); // Added await here
|
||||
logger.log(`${pc.bold(pc.cyan("Extracted information:✨"))}:\n`);
|
||||
logger.log(cliMarkdown(research));
|
||||
} catch (error) {
|
||||
logger.error(`Error processing file: ${error}`);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,8 @@
|
||||
import { createConsola } from "consola";
|
||||
import type { ConsolaInstance } from "consola";
|
||||
|
||||
export const logger: ConsolaInstance = createConsola({
|
||||
formatOptions: {
|
||||
date: false,
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,172 @@
|
||||
type Author = {
|
||||
name: string;
|
||||
affiliation?: string;
|
||||
email?: string;
|
||||
};
|
||||
|
||||
type Methodology = {
|
||||
approach?: string;
|
||||
participants?: string;
|
||||
methods?: string[];
|
||||
};
|
||||
|
||||
type Result = {
|
||||
finding?: string;
|
||||
significance?: string;
|
||||
supportingData?: string;
|
||||
};
|
||||
|
||||
type Reference = {
|
||||
title: string;
|
||||
authors: string;
|
||||
year?: string;
|
||||
relevance?: string;
|
||||
};
|
||||
|
||||
type Discussion = {
|
||||
implications?: string[];
|
||||
limitations?: string[];
|
||||
futureWork?: string[];
|
||||
};
|
||||
|
||||
type Publication = {
|
||||
journal?: string;
|
||||
year: string;
|
||||
doi?: string;
|
||||
url?: string;
|
||||
};
|
||||
|
||||
export type ResearchData = {
|
||||
title: string;
|
||||
authors: Author[];
|
||||
abstract: string;
|
||||
keywords?: string[];
|
||||
mainFindings: string[];
|
||||
methodology?: Methodology;
|
||||
results?: Result[];
|
||||
discussion?: Discussion;
|
||||
references?: Reference[];
|
||||
publication?: Publication;
|
||||
};
|
||||
|
||||
export function renderMarkdown(data: ResearchData): string {
|
||||
const {
|
||||
title,
|
||||
authors,
|
||||
abstract,
|
||||
keywords,
|
||||
mainFindings,
|
||||
methodology,
|
||||
results,
|
||||
discussion,
|
||||
references,
|
||||
publication,
|
||||
} = data;
|
||||
|
||||
const md: string[] = [];
|
||||
|
||||
md.push(`# ${title}\n`);
|
||||
|
||||
// Authors
|
||||
md.push(`## Authors`);
|
||||
md.push(
|
||||
authors
|
||||
.map(
|
||||
(author) =>
|
||||
`- **${author.name}**${
|
||||
author.affiliation ? `, *${author.affiliation}*` : ""
|
||||
}${author.email ? ` (${author.email})` : ""}`,
|
||||
)
|
||||
.join("\n"),
|
||||
);
|
||||
|
||||
// Abstract
|
||||
md.push(`\n## Abstract\n${abstract}`);
|
||||
|
||||
// Keywords
|
||||
if (keywords && keywords.length > 0) {
|
||||
md.push(`\n## Keywords\n${keywords.map((k) => `- ${k}`).join("\n")}`);
|
||||
}
|
||||
|
||||
// Main Findings
|
||||
md.push(
|
||||
`\n## Main Findings\n${mainFindings.map((f) => `- ${f}`).join("\n")}`,
|
||||
);
|
||||
|
||||
// Methodology
|
||||
if (methodology) {
|
||||
md.push(`\n## Methodology`);
|
||||
if (methodology.approach) md.push(`**Approach:** ${methodology.approach}`);
|
||||
if (methodology.participants)
|
||||
md.push(`**Participants:** ${methodology.participants}`);
|
||||
if (methodology.methods?.length) {
|
||||
md.push(
|
||||
`**Methods:**\n${methodology.methods.map((m) => `- ${m}`).join("\n")}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Results
|
||||
if (results?.length) {
|
||||
md.push(`\n## Results`);
|
||||
results.forEach((result, i) => {
|
||||
md.push(`\n### Result ${i + 1}`);
|
||||
if (result.finding) md.push(`- **Finding:** ${result.finding}`);
|
||||
if (result.significance)
|
||||
md.push(`- **Significance:** ${result.significance}`);
|
||||
if (result.supportingData)
|
||||
md.push(`- **Supporting Data:** ${result.supportingData}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Discussion
|
||||
if (discussion) {
|
||||
md.push(`\n## Discussion`);
|
||||
if (discussion.implications?.length) {
|
||||
md.push(
|
||||
`### Implications\n${discussion.implications
|
||||
.map((d) => `- ${d}`)
|
||||
.join("\n")}`,
|
||||
);
|
||||
}
|
||||
if (discussion.limitations?.length) {
|
||||
md.push(
|
||||
`### Limitations\n${discussion.limitations
|
||||
.map((d) => `- ${d}`)
|
||||
.join("\n")}`,
|
||||
);
|
||||
}
|
||||
if (discussion.futureWork?.length) {
|
||||
md.push(
|
||||
`### Future Work\n${discussion.futureWork
|
||||
.map((d) => `- ${d}`)
|
||||
.join("\n")}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// References
|
||||
if (references?.length) {
|
||||
md.push(`\n## References`);
|
||||
references.forEach((ref, i) => {
|
||||
md.push(
|
||||
`\n**[${i + 1}]** ${ref.title} — *${ref.authors}*${
|
||||
ref.year ? ` (${ref.year})` : ""
|
||||
}`,
|
||||
);
|
||||
if (ref.relevance) md.push(`> ${ref.relevance}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Publication Info
|
||||
if (publication) {
|
||||
md.push(`\n## Publication`);
|
||||
if (publication.journal) md.push(`- **Journal:** ${publication.journal}`);
|
||||
if (publication.year) md.push(`- **Year:** ${publication.year}`);
|
||||
if (publication.doi) md.push(`- **DOI:** ${publication.doi}`);
|
||||
if (publication.url)
|
||||
md.push(`- **URL:** [${publication.url}](${publication.url})`);
|
||||
}
|
||||
|
||||
return md.join("\n");
|
||||
}
|
||||
@@ -0,0 +1,169 @@
|
||||
export const dataSchema = {
|
||||
type: "object",
|
||||
required: ["title", "authors", "abstract", "mainFindings"],
|
||||
properties: {
|
||||
title: {
|
||||
type: "string",
|
||||
description: "The full title of the research paper",
|
||||
},
|
||||
authors: {
|
||||
type: "array",
|
||||
description: "List of all authors of the paper",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
name: {
|
||||
type: "string",
|
||||
description: "Full name of the author",
|
||||
},
|
||||
affiliation: {
|
||||
type: "string",
|
||||
description:
|
||||
"Institution or organization the author is affiliated with",
|
||||
},
|
||||
email: {
|
||||
type: "string",
|
||||
description: "Contact email of the author if provided",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
abstract: {
|
||||
type: "string",
|
||||
description: "Complete abstract or summary of the paper",
|
||||
},
|
||||
keywords: {
|
||||
type: "array",
|
||||
description:
|
||||
"Key terms and phrases that describe the paper's main topics",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
mainFindings: {
|
||||
type: "array",
|
||||
description: "Key findings, conclusions, or contributions of the paper",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
methodology: {
|
||||
type: "object",
|
||||
description: "Research methods and approaches used",
|
||||
properties: {
|
||||
approach: {
|
||||
type: "string",
|
||||
description: "Overall research approach or study design",
|
||||
},
|
||||
participants: {
|
||||
type: "string",
|
||||
description: "Description of study participants or data sources",
|
||||
},
|
||||
methods: {
|
||||
type: "array",
|
||||
description: "Specific methods, techniques, or tools used",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
results: {
|
||||
type: "array",
|
||||
description: "Main results and outcomes of the research",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
finding: {
|
||||
type: "string",
|
||||
description: "Description of the specific result or finding",
|
||||
},
|
||||
significance: {
|
||||
type: "string",
|
||||
description:
|
||||
"Statistical significance or importance of the finding",
|
||||
},
|
||||
supportingData: {
|
||||
type: "string",
|
||||
description: "Relevant statistics, measurements, or data points",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discussion: {
|
||||
type: "object",
|
||||
properties: {
|
||||
implications: {
|
||||
type: "array",
|
||||
description: "Theoretical or practical implications of the findings",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
limitations: {
|
||||
type: "array",
|
||||
description: "Study limitations or constraints",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
futureWork: {
|
||||
type: "array",
|
||||
description: "Suggested future research directions",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
references: {
|
||||
type: "array",
|
||||
description:
|
||||
"Key papers cited that are crucial to understanding this work",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
title: {
|
||||
type: "string",
|
||||
description: "Title of the cited paper",
|
||||
},
|
||||
authors: {
|
||||
type: "string",
|
||||
description: "Authors of the cited paper",
|
||||
},
|
||||
year: {
|
||||
type: "string",
|
||||
description: "Publication year",
|
||||
},
|
||||
relevance: {
|
||||
type: "string",
|
||||
description: "Why this reference is important to the current paper",
|
||||
},
|
||||
},
|
||||
required: ["title", "authors"],
|
||||
},
|
||||
},
|
||||
publication: {
|
||||
type: "object",
|
||||
properties: {
|
||||
journal: {
|
||||
type: "string",
|
||||
description: "Name of the journal or conference",
|
||||
},
|
||||
year: {
|
||||
type: "string",
|
||||
description: "Year of publication",
|
||||
},
|
||||
doi: {
|
||||
type: "string",
|
||||
description: "Digital Object Identifier (DOI) of the paper",
|
||||
},
|
||||
url: {
|
||||
type: "string",
|
||||
description: "URL where the paper can be accessed",
|
||||
},
|
||||
},
|
||||
required: ["year"],
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,4 @@
|
||||
declare module "cli-markdown" {
|
||||
function cliMarkdown(input: string): string;
|
||||
export default cliMarkdown;
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
import * as readline from "readline/promises";
|
||||
import figlet from "figlet";
|
||||
import pc from "picocolors";
|
||||
|
||||
export async function renderLogo(): Promise<void> {
|
||||
const logoText = figlet.textSync("Extract Demo", {
|
||||
font: "ANSI Shadow",
|
||||
horizontalLayout: "default",
|
||||
verticalLayout: "default",
|
||||
width: 100,
|
||||
whitespaceBreak: true,
|
||||
});
|
||||
|
||||
// Add some styling with picocolors
|
||||
const styledLogo = pc.bold(pc.redBright(logoText));
|
||||
|
||||
// Add some padding/margin
|
||||
console.log("\n");
|
||||
console.log(styledLogo);
|
||||
console.log(pc.gray("─".repeat(60)));
|
||||
console.log("\n");
|
||||
}
|
||||
|
||||
export async function consoleInput(): Promise<string> {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
const answer = await rl.question("Path to your file: ");
|
||||
rl.close();
|
||||
return answer;
|
||||
}
|
||||
@@ -40,8 +40,8 @@ A TypeScript demo application showcasing the power of **LlamaCloud Index** - a f
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd llamaparse-demo
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/index/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
@@ -120,12 +120,12 @@ pnpm run lint
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../../LICENSE) file for details.
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `pnpm format` and `pnpm lint`
|
||||
4. Run `pnpm run format` and `pnpm run lint`
|
||||
5. Submit a pull request
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"consola": "^3.4.2",
|
||||
"dotenv": "^17.2.1",
|
||||
"figlet": "^1.8.2",
|
||||
"llama-cloud-services": "link:../../../ts/llama_cloud_services",
|
||||
"llama-cloud-services": "link:../../ts/llama_cloud_services",
|
||||
"picocolors": "^1.1.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,8 +40,8 @@ A TypeScript demo application showcasing the power of **LlamaParse** - an intell
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd llamaparse-demo
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/parse/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
@@ -113,12 +113,12 @@ pnpm run lint
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../../LICENSE) file for details.
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `pnpm format` and `pnpm lint`
|
||||
4. Run `pnpm run format` and `pnpm run lint`
|
||||
5. Submit a pull request
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
"ai": "^4.3.19",
|
||||
"consola": "^3.4.2",
|
||||
"figlet": "^1.8.2",
|
||||
"llama-cloud-services": "link:../../../ts/llama_cloud_services",
|
||||
"llama-cloud-services": "link:../../ts/llama_cloud_services",
|
||||
"picocolors": "^1.1.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -327,4 +327,5 @@ Another option (orthogonal to the above) is to break the document into smaller s
|
||||
## Additional Resources
|
||||
|
||||
- [Example Notebook](docs/examples-py/extract/resume_screening.ipynb) - Detailed walkthrough of resume parsing
|
||||
- [Example Application with TypeScript](./examples-ts/extract/) - End-to-end examples using LlamaExtract TypeScript client.
|
||||
- [Discord Community](https://discord.com/invite/eN6D2HQ4aX) - Get help and share feedback
|
||||
|
||||
Generated
+63
@@ -23,6 +23,9 @@ importers:
|
||||
ajv:
|
||||
specifier: ^8.17.1
|
||||
version: 8.17.1
|
||||
file-type:
|
||||
specifier: ^21.0.0
|
||||
version: 21.0.0
|
||||
p-retry:
|
||||
specifier: ^6.2.1
|
||||
version: 6.2.1
|
||||
@@ -92,6 +95,8 @@ importers:
|
||||
|
||||
ts/llama_cloud_services/beta/agent: {}
|
||||
|
||||
ts/llama_cloud_services/extract: {}
|
||||
|
||||
ts/llama_cloud_services/parse: {}
|
||||
|
||||
ts/llama_cloud_services/reader: {}
|
||||
@@ -681,6 +686,13 @@ packages:
|
||||
'@swc/types@0.1.23':
|
||||
resolution: {integrity: sha512-u1iIVZV9Q0jxY+yM2vw/hZGDNudsN85bBpTqzAQ9rzkxW9D+e3aEM4Han+ow518gSewkXgjmEK0BD79ZcNVgPw==}
|
||||
|
||||
'@tokenizer/inflate@0.2.7':
|
||||
resolution: {integrity: sha512-MADQgmZT1eKjp06jpI2yozxaU9uVs4GzzgSL+uEq7bVcJ9V1ZXQkeGNql1fsSI0gMy1vhvNTNbUqrx+pZfJVmg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
'@tokenizer/token@0.3.0':
|
||||
resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
|
||||
|
||||
'@types/estree@1.0.8':
|
||||
resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==}
|
||||
|
||||
@@ -1097,6 +1109,10 @@ packages:
|
||||
resolution: {integrity: sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==}
|
||||
engines: {node: '>=16.0.0'}
|
||||
|
||||
file-type@21.0.0:
|
||||
resolution: {integrity: sha512-ek5xNX2YBYlXhiUXui3D/BXa3LdqPmoLJ7rqEx2bKJ7EAUEfmXgW0Das7Dc6Nr9MvqaOnIqiPV0mZk/r/UpNAg==}
|
||||
engines: {node: '>=20'}
|
||||
|
||||
fill-range@7.1.1:
|
||||
resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
|
||||
engines: {node: '>=8'}
|
||||
@@ -1182,6 +1198,9 @@ packages:
|
||||
html-escaper@2.0.2:
|
||||
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
|
||||
|
||||
ieee754@1.2.1:
|
||||
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
|
||||
|
||||
ignore@5.3.2:
|
||||
resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==}
|
||||
engines: {node: '>= 4'}
|
||||
@@ -1632,6 +1651,10 @@ packages:
|
||||
resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
strtok3@10.3.4:
|
||||
resolution: {integrity: sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
supports-color@7.2.0:
|
||||
resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==}
|
||||
engines: {node: '>=8'}
|
||||
@@ -1674,6 +1697,10 @@ packages:
|
||||
resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
|
||||
engines: {node: '>=8.0'}
|
||||
|
||||
token-types@6.0.4:
|
||||
resolution: {integrity: sha512-MD9MjpVNhVyH4fyd5rKphjvt/1qj+PtQUz65aFqAZA6XniWAuSFRjLk3e2VALEFlh9OwBpXUN7rfeqSnT/Fmkw==}
|
||||
engines: {node: '>=14.16'}
|
||||
|
||||
totalist@3.0.1:
|
||||
resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==}
|
||||
engines: {node: '>=6'}
|
||||
@@ -1745,6 +1772,10 @@ packages:
|
||||
engines: {node: '>=0.8.0'}
|
||||
hasBin: true
|
||||
|
||||
uint8array-extras@1.4.0:
|
||||
resolution: {integrity: sha512-ZPtzy0hu4cZjv3z5NW9gfKnNLjoz4y6uv4HlelAjDK7sY/xOkKZv9xK/WQpcsBB3jEybChz9DPC2U/+cusjJVQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
undici-types@6.21.0:
|
||||
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
|
||||
|
||||
@@ -2312,6 +2343,16 @@ snapshots:
|
||||
dependencies:
|
||||
'@swc/counter': 0.1.3
|
||||
|
||||
'@tokenizer/inflate@0.2.7':
|
||||
dependencies:
|
||||
debug: 4.4.1
|
||||
fflate: 0.8.2
|
||||
token-types: 6.0.4
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@tokenizer/token@0.3.0': {}
|
||||
|
||||
'@types/estree@1.0.8': {}
|
||||
|
||||
'@types/json-schema@7.0.15': {}
|
||||
@@ -2815,6 +2856,15 @@ snapshots:
|
||||
dependencies:
|
||||
flat-cache: 4.0.1
|
||||
|
||||
file-type@21.0.0:
|
||||
dependencies:
|
||||
'@tokenizer/inflate': 0.2.7
|
||||
strtok3: 10.3.4
|
||||
token-types: 6.0.4
|
||||
uint8array-extras: 1.4.0
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
fill-range@7.1.1:
|
||||
dependencies:
|
||||
to-regex-range: 5.0.1
|
||||
@@ -2903,6 +2953,8 @@ snapshots:
|
||||
|
||||
html-escaper@2.0.2: {}
|
||||
|
||||
ieee754@1.2.1: {}
|
||||
|
||||
ignore@5.3.2: {}
|
||||
|
||||
ignore@7.0.5: {}
|
||||
@@ -3328,6 +3380,10 @@ snapshots:
|
||||
|
||||
strip-json-comments@3.1.1: {}
|
||||
|
||||
strtok3@10.3.4:
|
||||
dependencies:
|
||||
'@tokenizer/token': 0.3.0
|
||||
|
||||
supports-color@7.2.0:
|
||||
dependencies:
|
||||
has-flag: 4.0.0
|
||||
@@ -3368,6 +3424,11 @@ snapshots:
|
||||
dependencies:
|
||||
is-number: 7.0.0
|
||||
|
||||
token-types@6.0.4:
|
||||
dependencies:
|
||||
'@tokenizer/token': 0.3.0
|
||||
ieee754: 1.2.1
|
||||
|
||||
totalist@3.0.1: {}
|
||||
|
||||
ts-api-utils@2.1.0(typescript@5.9.2):
|
||||
@@ -3425,6 +3486,8 @@ snapshots:
|
||||
uglify-js@3.19.3:
|
||||
optional: true
|
||||
|
||||
uint8array-extras@1.4.0: {}
|
||||
|
||||
undici-types@6.21.0: {}
|
||||
|
||||
undici-types@7.10.0: {}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"type": "module",
|
||||
"main": "./dist/index.cjs",
|
||||
"module": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"exports": "./dist/index.js",
|
||||
"private": true
|
||||
}
|
||||
@@ -12855,6 +12855,72 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/api/v1/extraction/run": {
|
||||
"post": {
|
||||
"tags": ["LlamaExtract"],
|
||||
"summary": "Extract Stateless",
|
||||
"description": "Stateless extraction endpoint that uses a default extraction agent in the user's default project. Requires data_schema, config, and either file_id, text, or base64 encoded file data.",
|
||||
"operationId": "extract_stateless_api_v1_extraction_run_post",
|
||||
"security": [
|
||||
{
|
||||
"HTTPBearer": []
|
||||
},
|
||||
{
|
||||
"HTTPBearer": []
|
||||
}
|
||||
],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "session",
|
||||
"in": "cookie",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Session"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"required": true,
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/StatelessExtractionRequest"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful Response",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ExtractJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Validation Error",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPValidationError"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/api/v1/extraction/jobs": {
|
||||
"get": {
|
||||
"tags": ["LlamaExtract"],
|
||||
@@ -35483,6 +35549,64 @@
|
||||
"title": "WebhookConfiguration",
|
||||
"description": "Allows the user to configure webhook options for notifications and callbacks."
|
||||
},
|
||||
"StatelessExtractionRequest": {
|
||||
"type": "object",
|
||||
"required": ["data_schema"],
|
||||
"properties": {
|
||||
"data_schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"additionalProperties": {
|
||||
"anyOf": [
|
||||
{
|
||||
"additionalProperties": true,
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"items": {},
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"config": {
|
||||
"$ref": "#/components/schemas/ExtractConfig",
|
||||
"description": "The configuration parameters for the extraction agent."
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"format": "uuid",
|
||||
"title": "File Id",
|
||||
"description": "ID of an uploaded file to extract from"
|
||||
}
|
||||
},
|
||||
"title": "StatelessExtractionRequest",
|
||||
"description": "Request body for stateless extraction. Must include either file_id, text, or base64."
|
||||
},
|
||||
"llama_index__core__base__llms__types__ChatMessage": {
|
||||
"properties": {
|
||||
"role": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "llama-cloud-services",
|
||||
"version": "0.2.0",
|
||||
"version": "0.3.0",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
@@ -20,7 +20,8 @@
|
||||
"./api",
|
||||
"./reader",
|
||||
"./parse",
|
||||
"./beta/agent"
|
||||
"./beta/agent",
|
||||
"./extract"
|
||||
],
|
||||
"exports": {
|
||||
"./openapi.json": "./openapi.json",
|
||||
@@ -68,6 +69,17 @@
|
||||
},
|
||||
"default": "./parse/dist/index.js"
|
||||
},
|
||||
"./extract": {
|
||||
"require": {
|
||||
"types": "./extract/dist/index.d.cts",
|
||||
"default": "./extract/dist/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./extract/dist/index.d.ts",
|
||||
"default": "./extract/dist/index.js"
|
||||
},
|
||||
"default": "./extract/dist/index.js"
|
||||
},
|
||||
".": {
|
||||
"require": {
|
||||
"types": "./dist/index.d.cts",
|
||||
@@ -113,6 +125,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"ajv": "^8.17.1",
|
||||
"file-type": "^21.0.0",
|
||||
"p-retry": "^6.2.1",
|
||||
"zod": "^3.25.76"
|
||||
},
|
||||
|
||||
@@ -0,0 +1,225 @@
|
||||
import { createClient, createConfig, type Client } from "@hey-api/client-fetch";
|
||||
import { File } from "buffer";
|
||||
import * as extract from "./extract";
|
||||
import type { ExtractAgent, ExtractConfig } from "./extract";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { ExtractResult } from "./type";
|
||||
|
||||
const URLS = {
|
||||
us: "https://api.cloud.llamaindex.ai",
|
||||
eu: "https://api.cloud.eu.llamaindex.ai",
|
||||
"us-staging": "https://api.staging.llamaindex.ai",
|
||||
} as const;
|
||||
|
||||
function getUrl(baseUrl: string | undefined, region: string | undefined) {
|
||||
if (typeof baseUrl != "undefined") {
|
||||
return baseUrl;
|
||||
}
|
||||
if (typeof region === "undefined") {
|
||||
return URLS["us"];
|
||||
} else if (region === "us" || region === "eu" || region === "us-staging") {
|
||||
return URLS[region];
|
||||
} else {
|
||||
throw new Error(`Unsupported region: ${region}`);
|
||||
}
|
||||
}
|
||||
|
||||
export class LlamaExtractAgent {
|
||||
private agent: ExtractAgent;
|
||||
private client: Client;
|
||||
id: string;
|
||||
name: string;
|
||||
dataSchema: {
|
||||
[key: string]:
|
||||
| string
|
||||
| number
|
||||
| boolean
|
||||
| {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
| unknown[]
|
||||
| null;
|
||||
};
|
||||
|
||||
constructor(agent: ExtractAgent, client: Client) {
|
||||
this.agent = agent;
|
||||
this.client = client;
|
||||
this.id = agent.id;
|
||||
this.name = agent.name;
|
||||
this.dataSchema = agent.data_schema;
|
||||
}
|
||||
|
||||
async extract(
|
||||
filePath: string | undefined = undefined,
|
||||
fileContent:
|
||||
| Buffer<ArrayBufferLike>
|
||||
| Uint8Array<ArrayBuffer>
|
||||
| string
|
||||
| File
|
||||
| undefined = undefined,
|
||||
fileName: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
fromUi: boolean | undefined = undefined,
|
||||
pollingInterval: number = 1,
|
||||
maxPollingIterations: number = 1800,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractResult | undefined> {
|
||||
return await extract.extract(
|
||||
this.agent.id,
|
||||
filePath,
|
||||
fileContent,
|
||||
fileName,
|
||||
project_id,
|
||||
organization_id,
|
||||
this.client,
|
||||
fromUi,
|
||||
pollingInterval,
|
||||
maxPollingIterations,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export class LlamaExtract {
|
||||
private client: Client;
|
||||
|
||||
constructor(
|
||||
apiKey: string | undefined = undefined,
|
||||
baseUrl: string | undefined = undefined,
|
||||
region: string | undefined = undefined,
|
||||
) {
|
||||
const key = apiKey ?? getEnv("LLAMA_CLOUD_API_KEY");
|
||||
if (typeof key === "undefined") {
|
||||
throw new Error(
|
||||
"No API key provided and no API key found in environment. Please pass the API key or set `LLAMA_CLOUD_API_KEY` as an environment variable.",
|
||||
);
|
||||
}
|
||||
const url = getUrl(baseUrl, region);
|
||||
this.client = createClient(
|
||||
createConfig({
|
||||
baseUrl: url,
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
async createAgent(
|
||||
name: string,
|
||||
dataSchema:
|
||||
| {
|
||||
[key: string]:
|
||||
| { [key: string]: unknown }
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}
|
||||
| string,
|
||||
config: ExtractConfig | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<LlamaExtractAgent | undefined> {
|
||||
const agent = await extract.createAgent(
|
||||
name,
|
||||
dataSchema,
|
||||
config,
|
||||
project_id,
|
||||
organization_id,
|
||||
this.client,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
);
|
||||
if (typeof agent != "undefined") {
|
||||
return new LlamaExtractAgent(agent, this.client);
|
||||
}
|
||||
}
|
||||
|
||||
async getAgent(
|
||||
name: string | undefined = undefined,
|
||||
id: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<LlamaExtractAgent | undefined> {
|
||||
const agent = await extract.getAgent(
|
||||
id,
|
||||
name,
|
||||
project_id,
|
||||
organization_id,
|
||||
this.client,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
);
|
||||
if (typeof agent != "undefined") {
|
||||
return new LlamaExtractAgent(agent, this.client);
|
||||
}
|
||||
}
|
||||
|
||||
async deleteAgent(
|
||||
id: string,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 500,
|
||||
): Promise<boolean | undefined> {
|
||||
return await extract.deleteAgent(
|
||||
id,
|
||||
this.client,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
);
|
||||
}
|
||||
|
||||
async extract(
|
||||
dataSchema:
|
||||
| {
|
||||
[key: string]:
|
||||
| { [key: string]: unknown }
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}
|
||||
| string,
|
||||
config: ExtractConfig | undefined = undefined,
|
||||
filePath: string | undefined = undefined,
|
||||
fileContent:
|
||||
| Buffer<ArrayBufferLike>
|
||||
| Uint8Array<ArrayBuffer>
|
||||
| string
|
||||
| File
|
||||
| undefined = undefined,
|
||||
fileName: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
pollingInterval: number = 1,
|
||||
maxPollingIterations: number = 1800,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractResult | undefined> {
|
||||
return await extract.extractStateless(
|
||||
dataSchema,
|
||||
config,
|
||||
filePath,
|
||||
fileContent,
|
||||
fileName,
|
||||
project_id,
|
||||
organization_id,
|
||||
this.client,
|
||||
pollingInterval,
|
||||
maxPollingIterations,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -18644,6 +18644,66 @@ export const WebhookConfigurationSchema = {
|
||||
"Allows the user to configure webhook options for notifications and callbacks.",
|
||||
} as const;
|
||||
|
||||
export const StatelessExtractionRequestSchema = {
|
||||
type: "object",
|
||||
required: ["data_schema"],
|
||||
properties: {
|
||||
data_schema: {
|
||||
anyOf: [
|
||||
{
|
||||
additionalProperties: {
|
||||
anyOf: [
|
||||
{
|
||||
additionalProperties: true,
|
||||
type: "object",
|
||||
},
|
||||
{
|
||||
items: {},
|
||||
type: "array",
|
||||
},
|
||||
{
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
type: "integer",
|
||||
},
|
||||
{
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
type: "boolean",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
},
|
||||
type: "object",
|
||||
},
|
||||
{
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
],
|
||||
},
|
||||
config: {
|
||||
$ref: "#/components/schemas/ExtractConfig",
|
||||
description: "The configuration parameters for the extraction agent.",
|
||||
},
|
||||
file_id: {
|
||||
type: "string",
|
||||
format: "uuid",
|
||||
title: "File Id",
|
||||
description: "ID of an uploaded file to extract from",
|
||||
},
|
||||
},
|
||||
title: "StatelessExtractionRequest",
|
||||
description:
|
||||
"Request body for stateless extraction. Must include either file_id, text, or base64.",
|
||||
} as const;
|
||||
|
||||
export const llama_index__core__base__llms__types__ChatMessageSchema = {
|
||||
properties: {
|
||||
role: {
|
||||
|
||||
@@ -452,6 +452,9 @@ import type {
|
||||
UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutData,
|
||||
UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponse,
|
||||
UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutError,
|
||||
ExtractStatelessApiV1ExtractionRunPostData,
|
||||
ExtractStatelessApiV1ExtractionRunPostResponse,
|
||||
ExtractStatelessApiV1ExtractionRunPostError,
|
||||
ListJobsApiV1ExtractionJobsGetData,
|
||||
ListJobsApiV1ExtractionJobsGetResponse,
|
||||
ListJobsApiV1ExtractionJobsGetError,
|
||||
@@ -5701,6 +5704,39 @@ export const updateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgent
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract Stateless
|
||||
* Stateless extraction endpoint that uses a default extraction agent in the user's default project. Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
||||
*/
|
||||
export const extractStatelessApiV1ExtractionRunPost = <
|
||||
ThrowOnError extends boolean = false,
|
||||
>(
|
||||
options: Options<ExtractStatelessApiV1ExtractionRunPostData, ThrowOnError>,
|
||||
) => {
|
||||
return (options.client ?? _heyApiClient).post<
|
||||
ExtractStatelessApiV1ExtractionRunPostResponse,
|
||||
ExtractStatelessApiV1ExtractionRunPostError,
|
||||
ThrowOnError
|
||||
>({
|
||||
security: [
|
||||
{
|
||||
scheme: "bearer",
|
||||
type: "http",
|
||||
},
|
||||
{
|
||||
scheme: "bearer",
|
||||
type: "http",
|
||||
},
|
||||
],
|
||||
url: "/api/v1/extraction/run",
|
||||
...options,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...options?.headers,
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* List Jobs
|
||||
*/
|
||||
|
||||
@@ -8272,6 +8272,35 @@ export type WebhookConfiguration = {
|
||||
> | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Request body for stateless extraction. Must include either file_id, text, or base64.
|
||||
*/
|
||||
export type StatelessExtractionRequest = {
|
||||
data_schema:
|
||||
| {
|
||||
[key: string]:
|
||||
| {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}
|
||||
| string
|
||||
| null;
|
||||
/**
|
||||
* The configuration parameters for the extraction agent.
|
||||
*/
|
||||
config?: ExtractConfig;
|
||||
/**
|
||||
* ID of an uploaded file to extract from
|
||||
*/
|
||||
file_id?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Chat message.
|
||||
*/
|
||||
@@ -13078,6 +13107,33 @@ export type UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentI
|
||||
export type UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponse =
|
||||
UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponses[keyof UpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponses];
|
||||
|
||||
export type ExtractStatelessApiV1ExtractionRunPostData = {
|
||||
body: StatelessExtractionRequest;
|
||||
path?: never;
|
||||
query?: never;
|
||||
url: "/api/v1/extraction/run";
|
||||
};
|
||||
|
||||
export type ExtractStatelessApiV1ExtractionRunPostErrors = {
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type ExtractStatelessApiV1ExtractionRunPostError =
|
||||
ExtractStatelessApiV1ExtractionRunPostErrors[keyof ExtractStatelessApiV1ExtractionRunPostErrors];
|
||||
|
||||
export type ExtractStatelessApiV1ExtractionRunPostResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: ExtractJob;
|
||||
};
|
||||
|
||||
export type ExtractStatelessApiV1ExtractionRunPostResponse =
|
||||
ExtractStatelessApiV1ExtractionRunPostResponses[keyof ExtractStatelessApiV1ExtractionRunPostResponses];
|
||||
|
||||
export type ListJobsApiV1ExtractionJobsGetData = {
|
||||
body?: never;
|
||||
path?: never;
|
||||
|
||||
@@ -3472,6 +3472,12 @@ export const zUserOrganizationRoleCreate = z.object({
|
||||
role_id: z.string().uuid(),
|
||||
});
|
||||
|
||||
export const zStatelessExtractionRequest = z.object({
|
||||
data_schema: z.union([z.object({}), z.string(), z.null()]),
|
||||
config: zExtractConfig.optional(),
|
||||
file_id: z.string().uuid().optional(),
|
||||
});
|
||||
|
||||
export const zListKeysApiV1ApiKeysGetResponse = z.array(zApiKey);
|
||||
|
||||
export const zGenerateKeyApiV1ApiKeysPostResponse = zApiKey;
|
||||
@@ -3829,6 +3835,8 @@ export const zGetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentId
|
||||
export const zUpdateExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdPutResponse =
|
||||
zExtractAgent;
|
||||
|
||||
export const zExtractStatelessApiV1ExtractionRunPostResponse = zExtractJob;
|
||||
|
||||
export const zListJobsApiV1ExtractionJobsGetResponse = z.array(zExtractJob);
|
||||
|
||||
export const zRunJobApiV1ExtractionJobsPostResponse = zExtractJob;
|
||||
|
||||
@@ -0,0 +1,651 @@
|
||||
import { emitWarning } from "process";
|
||||
import fs from "fs/promises";
|
||||
import { Blob } from "buffer";
|
||||
import * as path from "path";
|
||||
import type { ExtractResult } from "./type";
|
||||
import { randomUUID } from "@llamaindex/env";
|
||||
import { File } from "buffer";
|
||||
import {
|
||||
type Options,
|
||||
type ExtractAgentCreate,
|
||||
type ExtractConfig,
|
||||
type ExtractJobCreate,
|
||||
type ExtractAgent,
|
||||
type ExtractJob,
|
||||
type CreateExtractionAgentApiV1ExtractionExtractionAgentsPostData,
|
||||
type GetExtractionAgentByNameApiV1ExtractionExtractionAgentsByNameNameGetData,
|
||||
type GetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGetData,
|
||||
type RunJobApiV1ExtractionJobsPostData,
|
||||
type GetJobApiV1ExtractionJobsJobIdGetData,
|
||||
type GetJobResultApiV1ExtractionJobsJobIdResultGetData,
|
||||
StatusEnum,
|
||||
type UploadFileApiV1FilesPostData,
|
||||
type StatelessExtractionRequest,
|
||||
type ExtractStatelessApiV1ExtractionRunPostData,
|
||||
type DeleteExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdDeleteData,
|
||||
createExtractionAgentApiV1ExtractionExtractionAgentsPost,
|
||||
getExtractionAgentByNameApiV1ExtractionExtractionAgentsByNameNameGet,
|
||||
getExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGet,
|
||||
runJobApiV1ExtractionJobsPost,
|
||||
getJobApiV1ExtractionJobsJobIdGet,
|
||||
getJobResultApiV1ExtractionJobsJobIdResultGet,
|
||||
uploadFileApiV1FilesPost,
|
||||
extractStatelessApiV1ExtractionRunPost,
|
||||
deleteExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdDelete,
|
||||
} from "./api";
|
||||
import type { Client } from "@hey-api/client-fetch";
|
||||
import { sleep } from "./utils";
|
||||
import { fileTypeFromBuffer } from "file-type";
|
||||
|
||||
type BodyUploadFileApiV1FilesPost = {
|
||||
upload_file: Blob | File;
|
||||
};
|
||||
|
||||
export async function createAgent(
|
||||
name: string,
|
||||
dataSchema:
|
||||
| {
|
||||
[key: string]:
|
||||
| { [key: string]: unknown }
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}
|
||||
| string,
|
||||
config: ExtractConfig = {} as ExtractConfig,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
client: Client | undefined = undefined,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractAgent | undefined> {
|
||||
const agentData = {
|
||||
name: name,
|
||||
data_schema: dataSchema,
|
||||
config: config,
|
||||
} as ExtractAgentCreate;
|
||||
const agentDataCreation = {
|
||||
body: agentData,
|
||||
query: { project_id: project_id, organization_id: organization_id },
|
||||
} as CreateExtractionAgentApiV1ExtractionExtractionAgentsPostData;
|
||||
const options =
|
||||
agentDataCreation as Options<CreateExtractionAgentApiV1ExtractionExtractionAgentsPostData>;
|
||||
if (typeof client != "undefined") {
|
||||
options.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while creating the agent: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
const response =
|
||||
await createExtractionAgentApiV1ExtractionExtractionAgentsPost(options);
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
`An error occurred while creating the extraction agent.\nDetails:\n\n${JSON.stringify(
|
||||
response.error,
|
||||
)}\n\nRetrying...`,
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
} else {
|
||||
return response.data as ExtractAgent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function getAgent(
|
||||
id: string | undefined = undefined,
|
||||
name: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
client: Client | undefined = undefined,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractAgent | undefined> {
|
||||
if (typeof id === "undefined" && typeof name === "undefined") {
|
||||
throw new Error("One of `id` and `string` must be passed.");
|
||||
} else if (typeof id != "undefined" && typeof name != "undefined") {
|
||||
emitWarning("You passed both `id` and `name`, using only id...");
|
||||
const data = {
|
||||
path: { extraction_agent_id: id },
|
||||
} as GetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGetData;
|
||||
const options =
|
||||
data as Options<GetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGetData>;
|
||||
if (typeof client != "undefined") {
|
||||
options.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while getting the agent: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
const response =
|
||||
await getExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGet(
|
||||
options,
|
||||
);
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
`An error occurred while getting the extraction agent by ID.\nDetails:\n\n${JSON.stringify(
|
||||
response.error,
|
||||
)}\n\nRetrying...`,
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
} else {
|
||||
return response.data as ExtractAgent;
|
||||
}
|
||||
}
|
||||
} else if (typeof name != "undefined" && typeof id === "undefined") {
|
||||
const data = {
|
||||
path: { name: name },
|
||||
query: { organization_id: organization_id, project_id: project_id },
|
||||
} as GetExtractionAgentByNameApiV1ExtractionExtractionAgentsByNameNameGetData;
|
||||
const options =
|
||||
data as Options<GetExtractionAgentByNameApiV1ExtractionExtractionAgentsByNameNameGetData>;
|
||||
if (typeof client != "undefined") {
|
||||
options.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while getting the agent: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
const response =
|
||||
await getExtractionAgentByNameApiV1ExtractionExtractionAgentsByNameNameGet(
|
||||
options,
|
||||
);
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
`An error occurred while getting the extraction agent by name.\nDetails:\n\n${JSON.stringify(
|
||||
response.error,
|
||||
)}\n\nRetrying...`,
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
} else {
|
||||
return response.data as ExtractAgent;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const data = {
|
||||
path: { extraction_agent_id: id },
|
||||
} as GetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGetData;
|
||||
const options =
|
||||
data as Options<GetExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGetData>;
|
||||
if (typeof client != "undefined") {
|
||||
options.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while getting the agent: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
const response =
|
||||
await getExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdGet(
|
||||
options,
|
||||
);
|
||||
if (!response.response.ok) {
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
`An error occurred while getting the extraction agent by ID.\nDetails:\n\n${JSON.stringify(
|
||||
response.error,
|
||||
)}\n\nRetrying...`,
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
}
|
||||
} else {
|
||||
return response.data as ExtractAgent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function textToFile(text: string, fileName: string | null = null) {
|
||||
return new File(
|
||||
[text],
|
||||
fileName ?? "uploadedFile_" + randomUUID().replaceAll("-", "_") + ".txt",
|
||||
);
|
||||
}
|
||||
|
||||
async function uploadFile(
|
||||
filePath: string | undefined = undefined,
|
||||
fileContent:
|
||||
| Buffer<ArrayBufferLike>
|
||||
| File
|
||||
| Uint8Array<ArrayBuffer>
|
||||
| string
|
||||
| undefined = undefined,
|
||||
fileName: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
client: Client | undefined = undefined,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<string | undefined> {
|
||||
let file: File | undefined = undefined;
|
||||
if (typeof filePath === "undefined" && typeof fileContent === "undefined") {
|
||||
throw new Error(
|
||||
"One between filePath and fileContent needs to be provided",
|
||||
);
|
||||
} else if (typeof filePath != "undefined") {
|
||||
const buffer = await fs.readFile(filePath);
|
||||
const actualFileName = fileName ?? path.basename(filePath);
|
||||
const uint8Array = new Uint8Array(buffer);
|
||||
file = new File([uint8Array], actualFileName);
|
||||
} else if (typeof fileContent != "undefined") {
|
||||
if (fileContent instanceof File) {
|
||||
file = fileContent;
|
||||
} else if (fileContent instanceof Buffer) {
|
||||
const fileType = await fileTypeFromBuffer(fileContent);
|
||||
const ext = fileType?.ext ?? "pdf";
|
||||
const uint8Array = new Uint8Array(fileContent);
|
||||
file = new File(
|
||||
[uint8Array],
|
||||
fileName ??
|
||||
"uploadedFile_" + randomUUID().replaceAll("-", "_") + "." + ext,
|
||||
);
|
||||
} else if (fileContent instanceof Uint8Array) {
|
||||
const fileType = await fileTypeFromBuffer(fileContent);
|
||||
const ext = fileType?.ext ?? "pdf";
|
||||
file = new File(
|
||||
[fileContent],
|
||||
fileName ??
|
||||
"uploadedFile_" + randomUUID().replaceAll("-", "_") + "." + ext,
|
||||
);
|
||||
} else if (typeof fileContent === "string") {
|
||||
file = textToFile(fileContent, fileName);
|
||||
} else {
|
||||
throw new Error("Unsupported fileContent type");
|
||||
}
|
||||
}
|
||||
const fileToUpload = {
|
||||
upload_file: file,
|
||||
} as BodyUploadFileApiV1FilesPost;
|
||||
const uploadData = {
|
||||
body: fileToUpload,
|
||||
query: { organization_id: organization_id, project_id: project_id },
|
||||
} as UploadFileApiV1FilesPostData;
|
||||
const uploadOptions = uploadData as Options<UploadFileApiV1FilesPostData>;
|
||||
if (typeof client != "undefined") {
|
||||
uploadOptions.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while processing your file: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
const uploadResponse = await uploadFileApiV1FilesPost(uploadOptions);
|
||||
let fileId: string | undefined = undefined;
|
||||
if (!uploadResponse.response.ok) {
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
}
|
||||
if (typeof uploadResponse.data != "undefined") {
|
||||
fileId = uploadResponse.data.id as string;
|
||||
return fileId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function createExtractJob(
|
||||
options:
|
||||
| Options<RunJobApiV1ExtractionJobsPostData>
|
||||
| Options<ExtractStatelessApiV1ExtractionRunPostData>,
|
||||
stateless: boolean = false,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<string | undefined> {
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while creating the extraction job: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
let response:
|
||||
| {
|
||||
data: ExtractJob | undefined;
|
||||
request: Request;
|
||||
response: Response;
|
||||
}
|
||||
| undefined = undefined;
|
||||
if (!stateless) {
|
||||
response = (await runJobApiV1ExtractionJobsPost(
|
||||
options as Options<RunJobApiV1ExtractionJobsPostData>,
|
||||
)) as {
|
||||
data: ExtractJob | undefined;
|
||||
request: Request;
|
||||
response: Response;
|
||||
};
|
||||
} else {
|
||||
response = (await extractStatelessApiV1ExtractionRunPost(
|
||||
options as Options<ExtractStatelessApiV1ExtractionRunPostData>,
|
||||
)) as {
|
||||
data: ExtractJob | undefined;
|
||||
request: Request;
|
||||
response: Response;
|
||||
};
|
||||
}
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
"An error occurred: ",
|
||||
JSON.stringify(response.error),
|
||||
"\nRetrying...",
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
}
|
||||
if (typeof response.data != "undefined") {
|
||||
const jobStatus = response.data.status as StatusEnum;
|
||||
if (jobStatus == "CANCELLED") {
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
} else if (jobStatus == "ERROR") {
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
} else {
|
||||
return response.data.id as string;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function pollForJobCompletion(
|
||||
jobId: string,
|
||||
interval: number = 1,
|
||||
maxIterations: number = 1800,
|
||||
client: Client | undefined = undefined,
|
||||
): Promise<boolean> {
|
||||
let status: StatusEnum | undefined = undefined;
|
||||
const jobData = {
|
||||
path: { job_id: jobId },
|
||||
} as GetJobApiV1ExtractionJobsJobIdGetData;
|
||||
const jobOptions = jobData as Options<GetJobApiV1ExtractionJobsJobIdGetData>;
|
||||
if (typeof client != "undefined") {
|
||||
jobOptions.client = client;
|
||||
}
|
||||
let numIterations: number = 0;
|
||||
while (true) {
|
||||
if (numIterations > maxIterations) {
|
||||
return false;
|
||||
}
|
||||
const response = await getJobApiV1ExtractionJobsJobIdGet(jobOptions);
|
||||
if (!response.response.ok) {
|
||||
numIterations++;
|
||||
}
|
||||
if (typeof response.data != "undefined") {
|
||||
status = response.data.status as StatusEnum;
|
||||
if (status == StatusEnum.CANCELLED || status == StatusEnum.ERROR) {
|
||||
throw new Error("There was an error extracting data from your file.");
|
||||
} else if (status == StatusEnum.SUCCESS) {
|
||||
return true;
|
||||
} else {
|
||||
numIterations++;
|
||||
await sleep(interval * 1000);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function getJobResult(
|
||||
jobId: string,
|
||||
client: Client | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractResult | undefined> {
|
||||
const jobData = {
|
||||
path: { job_id: jobId },
|
||||
query: { organization_id: organization_id, project_id: project_id },
|
||||
} as GetJobResultApiV1ExtractionJobsJobIdResultGetData;
|
||||
const jobOptions =
|
||||
jobData as Options<GetJobResultApiV1ExtractionJobsJobIdResultGetData>;
|
||||
if (typeof client != "undefined") {
|
||||
jobOptions.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Error while getting the result of the extraction job: Exceeded maximum number of retries, the API keeps returning errors.",
|
||||
);
|
||||
}
|
||||
const response =
|
||||
await getJobResultApiV1ExtractionJobsJobIdResultGet(jobOptions);
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
"An error occurred: ",
|
||||
JSON.stringify(response.error),
|
||||
"\nRetrying...",
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
}
|
||||
if (typeof response.data != "undefined") {
|
||||
return {
|
||||
data: response.data.data,
|
||||
extractionMetadata: response.data.extraction_metadata,
|
||||
} as ExtractResult;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function extract(
|
||||
agentId: string,
|
||||
filePath: string | undefined = undefined,
|
||||
fileContent:
|
||||
| Buffer<ArrayBufferLike>
|
||||
| File
|
||||
| Uint8Array<ArrayBuffer>
|
||||
| string
|
||||
| undefined = undefined,
|
||||
fileName: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
client: Client | undefined = undefined,
|
||||
fromUi: boolean | undefined = undefined,
|
||||
pollingInterval: number = 1,
|
||||
maxPollingIterations: number = 1800,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractResult | undefined> {
|
||||
const fileId = (await uploadFile(
|
||||
filePath,
|
||||
fileContent,
|
||||
fileName,
|
||||
project_id,
|
||||
organization_id,
|
||||
client,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
)) as string;
|
||||
const extractJobCreate = {
|
||||
extraction_agent_id: agentId,
|
||||
file_id: fileId,
|
||||
} as ExtractJobCreate;
|
||||
const extractData = {
|
||||
body: extractJobCreate,
|
||||
query: { from_ui: fromUi },
|
||||
} as RunJobApiV1ExtractionJobsPostData;
|
||||
const extractOptions =
|
||||
extractData as Options<RunJobApiV1ExtractionJobsPostData>;
|
||||
if (typeof client != "undefined") {
|
||||
extractOptions.client = client;
|
||||
}
|
||||
const jobId = (await createExtractJob(
|
||||
extractOptions,
|
||||
false,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
)) as string;
|
||||
const success = await pollForJobCompletion(
|
||||
jobId,
|
||||
pollingInterval,
|
||||
maxPollingIterations,
|
||||
client,
|
||||
);
|
||||
if (!success) {
|
||||
throw new Error("Your job is taking longer than 10 minutes, timing out...");
|
||||
} else {
|
||||
return (await getJobResult(
|
||||
jobId,
|
||||
client,
|
||||
project_id,
|
||||
organization_id,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
)) as ExtractResult;
|
||||
}
|
||||
}
|
||||
|
||||
export async function extractStateless(
|
||||
dataSchema:
|
||||
| {
|
||||
[key: string]:
|
||||
| { [key: string]: unknown }
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}
|
||||
| string,
|
||||
config: ExtractConfig = {} as ExtractConfig,
|
||||
filePath: string | undefined = undefined,
|
||||
fileContent:
|
||||
| Buffer<ArrayBufferLike>
|
||||
| File
|
||||
| Uint8Array<ArrayBuffer>
|
||||
| string
|
||||
| undefined = undefined,
|
||||
fileName: string | undefined = undefined,
|
||||
project_id: string | null = null,
|
||||
organization_id: string | null = null,
|
||||
client: Client | undefined = undefined,
|
||||
pollingInterval: number = 1,
|
||||
maxPollingIterations: number = 1800,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<ExtractResult | undefined> {
|
||||
const fileId = (await uploadFile(
|
||||
filePath,
|
||||
fileContent,
|
||||
fileName,
|
||||
project_id,
|
||||
organization_id,
|
||||
client,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
)) as string;
|
||||
const extractStatetelessCreate = {
|
||||
data_schema: dataSchema,
|
||||
file_id: fileId,
|
||||
config: config,
|
||||
} as StatelessExtractionRequest;
|
||||
const extractStatetelessData = {
|
||||
body: extractStatetelessCreate,
|
||||
} as ExtractStatelessApiV1ExtractionRunPostData;
|
||||
const extractOptions =
|
||||
extractStatetelessData as Options<ExtractStatelessApiV1ExtractionRunPostData>;
|
||||
if (typeof client != "undefined") {
|
||||
extractOptions.client = client;
|
||||
}
|
||||
const jobId = (await createExtractJob(
|
||||
extractOptions,
|
||||
true,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
)) as string;
|
||||
const success = await pollForJobCompletion(
|
||||
jobId,
|
||||
pollingInterval,
|
||||
maxPollingIterations,
|
||||
client,
|
||||
);
|
||||
if (!success) {
|
||||
throw new Error("Your job is taking longer than 10 minutes, timing out...");
|
||||
} else {
|
||||
return (await getJobResult(
|
||||
jobId,
|
||||
client,
|
||||
project_id,
|
||||
organization_id,
|
||||
maxRetriesOnError,
|
||||
retryInterval,
|
||||
)) as ExtractResult;
|
||||
}
|
||||
}
|
||||
|
||||
export async function deleteAgent(
|
||||
id: string,
|
||||
client: Client | undefined = undefined,
|
||||
maxRetriesOnError: number = 10,
|
||||
retryInterval: number = 0.5,
|
||||
): Promise<boolean | undefined> {
|
||||
const deleteData = {
|
||||
path: { extraction_agent_id: id },
|
||||
} as DeleteExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdDeleteData;
|
||||
const deleteOptions =
|
||||
deleteData as Options<DeleteExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdDeleteData>;
|
||||
if (typeof client != "undefined") {
|
||||
deleteOptions.client = client;
|
||||
}
|
||||
let retries: number = 0;
|
||||
while (true) {
|
||||
if (retries > maxRetriesOnError) {
|
||||
throw new Error(
|
||||
"Maximum number of attempts for deleting agent " +
|
||||
id +
|
||||
" reached, but the API continues to return errors.",
|
||||
);
|
||||
}
|
||||
const response =
|
||||
await deleteExtractionAgentApiV1ExtractionExtractionAgentsExtractionAgentIdDelete(
|
||||
deleteOptions,
|
||||
);
|
||||
if (!response.response.ok) {
|
||||
if ("error" in response) {
|
||||
console.log(
|
||||
`An error occurred while deleting the agent: ${JSON.stringify(
|
||||
response.error,
|
||||
)}\nRetrying...`,
|
||||
);
|
||||
}
|
||||
retries++;
|
||||
await sleep(retryInterval * 1000);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export { type ExtractAgent, type ExtractConfig };
|
||||
@@ -6,3 +6,5 @@ export {
|
||||
} from "./LlamaCloudRetriever.js";
|
||||
export type { CloudConstructorParams } from "./type.js";
|
||||
export { LlamaParseReader } from "./reader.js";
|
||||
export { LlamaExtract, LlamaExtractAgent } from "./LlamaExtract.js";
|
||||
export type { ExtractConfig } from "./extract.js";
|
||||
|
||||
@@ -8,3 +8,44 @@ export type CloudConstructorParams = {
|
||||
projectName: string;
|
||||
organizationId?: string | undefined;
|
||||
} & ClientParams;
|
||||
|
||||
export type ExtractResult = {
|
||||
data:
|
||||
| {
|
||||
[key: string]:
|
||||
| {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}
|
||||
| Array<{
|
||||
[key: string]:
|
||||
| {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
}>
|
||||
| null;
|
||||
extractionMetadata: {
|
||||
[key: string]:
|
||||
| {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
| Array<unknown>
|
||||
| string
|
||||
| number
|
||||
| number
|
||||
| boolean
|
||||
| null;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import { describe, it, expect, beforeEach, beforeAll } from "vitest";
|
||||
import { LlamaParseReader } from "../src/reader.js";
|
||||
import { LlamaCloudIndex } from "../src/LlamaCloudIndex.js";
|
||||
import { LlamaExtract, LlamaExtractAgent } from "../src/LlamaExtract.js";
|
||||
import { Document } from "@llamaindex/core/schema";
|
||||
import { fs } from "@llamaindex/env";
|
||||
import { ExtractConfig } from "../src/api.js";
|
||||
|
||||
// Integration tests that require actual API keys and files
|
||||
describe("Integration Tests", () => {
|
||||
@@ -414,6 +416,121 @@ describe("Integration Tests", () => {
|
||||
);
|
||||
});
|
||||
|
||||
describe("LlamaExtract Integration", () => {
|
||||
it.skipIf(skipIfNoApiKey)(
|
||||
"should create agents correctly",
|
||||
async () => {
|
||||
const dataSchema = {
|
||||
properties: {
|
||||
text: {
|
||||
description: "Text from the file",
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
required: ["text"],
|
||||
type: "object",
|
||||
};
|
||||
const extractClient = new LlamaExtract(
|
||||
process.env.LLAMA_CLOUD_API_KEY!,
|
||||
"https://api.cloud.llamaindex.ai",
|
||||
);
|
||||
const agent = await extractClient.createAgent(
|
||||
"ExtractTestAgent",
|
||||
dataSchema,
|
||||
);
|
||||
expect(agent).instanceOf(LlamaExtractAgent);
|
||||
},
|
||||
60000,
|
||||
);
|
||||
it.skipIf(skipIfNoApiKey)(
|
||||
"should fetch agents correctly",
|
||||
async () => {
|
||||
const extractClient = new LlamaExtract(
|
||||
process.env.LLAMA_CLOUD_API_KEY!,
|
||||
"https://api.cloud.llamaindex.ai",
|
||||
);
|
||||
const agent = await extractClient.getAgent("ExtractTestAgent");
|
||||
expect(agent).instanceOf(LlamaExtractAgent);
|
||||
},
|
||||
60000,
|
||||
);
|
||||
it.skipIf(skipIfNoApiKey)(
|
||||
"should extract data correctly (file paths and file contents) with an agent and delete that agent",
|
||||
async () => {
|
||||
const extractClient = new LlamaExtract(
|
||||
process.env.LLAMA_CLOUD_API_KEY!,
|
||||
"https://api.cloud.llamaindex.ai",
|
||||
);
|
||||
const agent = await extractClient.getAgent("ExtractTestAgent");
|
||||
const testContent =
|
||||
"**Text to extract**: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.";
|
||||
const testFilePath = "test-extract-agent.md";
|
||||
|
||||
await fs.writeFile(testFilePath, new TextEncoder().encode(testContent));
|
||||
const result = await agent!.extract("test-extract-agent.md");
|
||||
expect("data" in result!).toBeTruthy();
|
||||
expect("extractionMetadata" in result!).toBeTruthy();
|
||||
|
||||
const buffer = await fs.readFile("test-extract-agent.md");
|
||||
const resultBuffer = await agent!.extract(
|
||||
undefined,
|
||||
buffer,
|
||||
"test-extract-agent.md",
|
||||
);
|
||||
expect("data" in resultBuffer!).toBeTruthy();
|
||||
expect("extractionMetadata" in resultBuffer!).toBeTruthy();
|
||||
|
||||
const success = await extractClient.deleteAgent(agent!.id);
|
||||
expect(success).toBeTruthy();
|
||||
},
|
||||
60000,
|
||||
);
|
||||
it.skipIf(skipIfNoApiKey)(
|
||||
"should extract statelessly file paths and file contents",
|
||||
async () => {
|
||||
const dataSchema = {
|
||||
properties: {
|
||||
text: {
|
||||
description: "Text from the file",
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
required: ["text"],
|
||||
type: "object",
|
||||
};
|
||||
|
||||
const testContent =
|
||||
"**Text to extract**: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.";
|
||||
const testFilePath = "test-extract.md";
|
||||
|
||||
await fs.writeFile(testFilePath, new TextEncoder().encode(testContent));
|
||||
|
||||
const extractClient = new LlamaExtract(
|
||||
process.env.LLAMA_CLOUD_API_KEY!,
|
||||
"https://api.cloud.llamaindex.ai",
|
||||
);
|
||||
const result = await extractClient.extract(
|
||||
dataSchema,
|
||||
{} as ExtractConfig,
|
||||
"test-extract.md",
|
||||
);
|
||||
expect("data" in result!).toBeTruthy();
|
||||
expect("extractionMetadata" in result!).toBeTruthy();
|
||||
|
||||
const buffer = await fs.readFile("test-extract.md");
|
||||
const resultBuffer = await extractClient.extract(
|
||||
dataSchema,
|
||||
{} as ExtractConfig,
|
||||
undefined,
|
||||
buffer,
|
||||
); // testing without passing a file name
|
||||
expect("data" in resultBuffer!).toBeTruthy();
|
||||
expect("extractionMetadata" in resultBuffer!).toBeTruthy();
|
||||
},
|
||||
60000,
|
||||
);
|
||||
});
|
||||
|
||||
describe("Error Handling Integration", () => {
|
||||
it.skipIf(skipIfNoApiKey)(
|
||||
"should handle malformed files gracefully",
|
||||
|
||||
Reference in New Issue
Block a user