Compare commits

..

8 Commits

Author SHA1 Message Date
github-actions[bot] c147d8a271 Release 0.4.14 (#1021)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-07-05 15:26:31 -07:00
Alex Yang 1c444d58b6 feat(cloud): update openapi.json (#1020) 2024-07-05 15:01:22 -07:00
github-actions[bot] 1f910f7566 Release 0.4.13 (#1016)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-07-05 11:44:37 -07:00
Thuc Pham 99826cff43 fix: missing dispatch retrieve event on llamacloud retriever (#1018) 2024-07-05 20:43:26 +07:00
Fabian Wimmer e8f8bea969 feat: add boundingBox and targetPages to LlamaParseReader (#1017) 2024-07-05 14:32:26 +07:00
Fabian Wimmer 304484b77a feat: add ignoreErrors flag to LlamaParse (#959)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2024-07-04 20:51:05 +07:00
abgita 29fed77d58 Fixed a typo in the retriever description (#1009) 2024-07-04 20:15:20 +07:00
Alex Yang db070588c8 ci: fix setup pnpm (#1014) 2024-07-03 12:11:48 -07:00
34 changed files with 2493 additions and 94 deletions
+1 -1
View File
@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
+1 -1
View File
@@ -10,7 +10,7 @@ jobs:
- name: Checkout Repo
uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
+1 -1
View File
@@ -12,7 +12,7 @@ jobs:
- name: Checkout Repo
uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
+1 -1
View File
@@ -15,7 +15,7 @@ jobs:
- name: Checkout Repo
uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
+5 -5
View File
@@ -23,7 +23,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
@@ -45,7 +45,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
@@ -60,7 +60,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
@@ -97,7 +97,7 @@ jobs:
name: Build LlamaIndex Example (${{ matrix.packages }})
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
@@ -116,7 +116,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
+14
View File
@@ -1,5 +1,19 @@
# docs
## 0.0.40
### Patch Changes
- llamaindex@0.4.14
## 0.0.39
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.0.38
### Patch Changes
+1 -1
View File
@@ -62,7 +62,7 @@ These building blocks can be customized to reflect ranking preferences, as well
[**Retrievers**](../modules/retriever.md):
A retriever defines how to efficiently retrieve relevant context from a knowledge base (i.e. index) when given a query.
The specific retrieval logic differs for difference indices, the most popular being dense retrieval against a vector index.
The specific retrieval logic differs for different indices, the most popular being dense retrieval against a vector index.
[**Response Synthesizers**](../modules/response_synthesizer.md):
A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.
@@ -44,6 +44,8 @@ They can be divided into two groups.
- `pageSeperator?` Optional. The page seperator to use. Defaults is `\\n---\\n`.
- `gpt4oMode` set to true to use GPT-4o to extract content. Default is `false`.
- `gpt4oApiKey?` Optional. Set the GPT-4o API key. Lowers the cost of parsing by using your own API key. Your OpenAI account will be charged. Can also be set in the environment variable `LLAMA_CLOUD_GPT4O_API_KEY`.
- `boundingBox?` Optional. Specify an area of the document to parse. Expects the bounding box margins as a string in clockwise order, e.g. `boundingBox = "0.1,0,0,0"` to not parse the top 10% of the document.
- `targetPages?` Optional. Specify which pages to parse by specifying them as a comma-seperated list. First page is `0`.
- `numWorkers` as in the python version, is set in `SimpleDirectoryReader`. Default is 1.
### LlamaParse with SimpleDirectoryReader
@@ -8,7 +8,7 @@ In JSON mode, LlamaParse will return a data structure representing the parsed ob
## Usage
For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method. Currently it can't be used with `SimpleDirectoryReader`.
For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method.
More information about indexing the results on the next page.
```ts
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "docs",
"version": "0.0.38",
"version": "0.0.40",
"private": true,
"scripts": {
"docusaurus": "docusaurus",
@@ -1,5 +1,21 @@
# @llamaindex/autotool-02-next-example
## 0.1.24
### Patch Changes
- llamaindex@0.4.14
- @llamaindex/autotool@1.0.0
## 0.1.23
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
- @llamaindex/autotool@1.0.0
## 0.1.22
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool-02-next-example",
"private": true,
"version": "0.1.22",
"version": "0.1.24",
"scripts": {
"dev": "next dev",
"build": "next build",
+1 -1
View File
@@ -51,7 +51,7 @@
"unplugin": "^1.10.1"
},
"peerDependencies": {
"llamaindex": "^0.4.12",
"llamaindex": "^0.4.14",
"openai": "^4",
"typescript": "^4"
},
+6
View File
@@ -1,5 +1,11 @@
# @llamaindex/cloud
## 0.1.3
### Patch Changes
- 1c444d5: feat(cloud): update openapi.json
## 0.1.2
### Patch Changes
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "0.1.2",
"version": "0.1.3",
"type": "module",
"license": "MIT",
"scripts": {
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/community
## 0.0.18
### Patch Changes
- llamaindex@0.4.14
## 0.0.17
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.0.16
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/community",
"description": "Community package for LlamaIndexTS",
"version": "0.0.16",
"version": "0.0.18",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/experimental
## 0.0.49
### Patch Changes
- llamaindex@0.4.14
## 0.0.48
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.0.47
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.47",
"version": "0.0.49",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+14
View File
@@ -1,5 +1,19 @@
# llamaindex
## 0.4.14
### Patch Changes
- Updated dependencies [1c444d5]
- @llamaindex/cloud@0.1.3
## 0.4.13
### Patch Changes
- e8f8bea: feat: add boundingBox and targetPages to LlamaParseReader
- 304484b: feat: add ignoreErrors flag to LlamaParseReader
## 0.4.12
### Patch Changes
@@ -1,5 +1,19 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.33
### Patch Changes
- llamaindex@0.4.14
## 0.0.32
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.0.31
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.31",
"version": "0.0.33",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,19 @@
# @llamaindex/next-agent-test
## 0.1.33
### Patch Changes
- llamaindex@0.4.14
## 0.1.32
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.1.31
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.31",
"version": "0.1.33",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,19 @@
# test-edge-runtime
## 0.1.32
### Patch Changes
- llamaindex@0.4.14
## 0.1.31
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.1.30
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.30",
"version": "0.1.32",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,19 @@
# @llamaindex/next-node-runtime
## 0.0.14
### Patch Changes
- llamaindex@0.4.14
## 0.0.13
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.0.12
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.0.12",
"version": "0.0.14",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,19 @@
# @llamaindex/waku-query-engine-test
## 0.0.33
### Patch Changes
- llamaindex@0.4.14
## 0.0.32
### Patch Changes
- Updated dependencies [e8f8bea]
- Updated dependencies [304484b]
- llamaindex@0.4.13
## 0.0.31
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.31",
"version": "0.0.33",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.4.12",
"version": "0.4.14",
"license": "MIT",
"type": "module",
"keywords": [
@@ -8,6 +8,7 @@ import type { NodeWithScore } from "@llamaindex/core/schema";
import { jsonToNode, ObjectType } from "@llamaindex/core/schema";
import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
import { wrapEventCaller } from "../internal/context/EventCaller.js";
import { getCallbackManager } from "../internal/settings/CallbackManager.js";
import { extractText } from "../llm/utils.js";
import type { ClientParams, CloudConstructorParams } from "./constants.js";
import { DEFAULT_PROJECT_NAME } from "./constants.js";
@@ -28,9 +29,14 @@ export class LlamaCloudRetriever implements BaseRetriever {
nodes: TextNodeWithScore[],
): NodeWithScore[] {
return nodes.map((node: TextNodeWithScore) => {
const textNode = jsonToNode(node.node, ObjectType.TEXT);
textNode.metadata = {
...textNode.metadata,
...node.node.extra_info, // append LlamaCloud extra_info to node metadata (file_name, pipeline_id, etc.)
};
return {
// Currently LlamaCloud only supports text nodes
node: jsonToNode(node.node, ObjectType.TEXT),
node: textNode,
score: node.score,
};
});
@@ -83,6 +89,15 @@ export class LlamaCloudRetriever implements BaseRetriever {
},
});
return this.resultNodesToNodeWithScore(results.retrieval_nodes);
const nodesWithScores = this.resultNodesToNodeWithScore(
results.retrieval_nodes,
);
getCallbackManager().dispatchEvent("retrieve-end", {
payload: {
query,
nodes: nodesWithScores,
},
});
return nodesWithScores;
}
}
@@ -133,6 +133,12 @@ export class LlamaParseReader extends FileReader {
gpt4oMode: boolean = false;
// The API key for the GPT-4o API. Optional, lowers the cost of parsing. Can be set as an env variable: LLAMA_CLOUD_GPT4O_API_KEY.
gpt4oApiKey?: string;
// The bounding box to use to extract text from documents. Describe as a string containing the bounding box margins.
boundingBox?: string;
// The target pages to extract text from documents. Describe as a comma separated list of page numbers. The first page of the document is page 0
targetPages?: string;
// Whether or not to ignore and skip errors raised during parsing.
ignoreErrors: boolean = true;
// numWorkers is implemented in SimpleDirectoryReader
constructor(params: Partial<LlamaParseReader> = {}) {
@@ -181,6 +187,8 @@ export class LlamaParseReader extends FileReader {
page_seperator: this.pageSeperator,
gpt4o_mode: this.gpt4oMode?.toString(),
gpt4o_api_key: this.gpt4oApiKey,
bounding_box: this.boundingBox,
target_pages: this.targetPages,
};
// Appends body with any defined LlamaParseBodyParams
@@ -278,19 +286,29 @@ export class LlamaParseReader extends FileReader {
fileContent: Uint8Array,
fileName?: string,
): Promise<Document[]> {
// Creates a job for the file
const jobId = await this.createJob(fileContent, fileName);
if (this.verbose) {
console.log(`Started parsing the file under job id ${jobId}`);
}
let jobId;
try {
// Creates a job for the file
jobId = await this.createJob(fileContent, fileName);
if (this.verbose) {
console.log(`Started parsing the file under job id ${jobId}`);
}
// Return results as Document objects
const resultJson = await this.getJobResult(jobId, this.resultType);
return [
new Document({
text: resultJson[this.resultType],
}),
];
// Return results as Document objects
const resultJson = await this.getJobResult(jobId, this.resultType);
return [
new Document({
text: resultJson[this.resultType],
}),
];
} catch (e) {
console.error(`Error while parsing file under job id ${jobId}`, e);
if (this.ignoreErrors) {
return [];
} else {
throw e;
}
}
}
/**
* Loads data from a file and returns an array of JSON objects.
@@ -300,18 +318,28 @@ export class LlamaParseReader extends FileReader {
* @return {Promise<Record<string, any>[]>} A Promise that resolves to an array of JSON objects.
*/
async loadJson(file: string): Promise<Record<string, any>[]> {
const data = await fs.readFile(file);
// Creates a job for the file
const jobId = await this.createJob(data);
if (this.verbose) {
console.log(`Started parsing the file under job id ${jobId}`);
}
let jobId;
try {
const data = await fs.readFile(file);
// Creates a job for the file
jobId = await this.createJob(data);
if (this.verbose) {
console.log(`Started parsing the file under job id ${jobId}`);
}
// Return results as an array of JSON objects (same format as Python version of the reader)
const resultJson = await this.getJobResult(jobId, "json");
resultJson.job_id = jobId;
resultJson.file_path = file;
return [resultJson];
// Return results as an array of JSON objects (same format as Python version of the reader)
const resultJson = await this.getJobResult(jobId, "json");
resultJson.job_id = jobId;
resultJson.file_path = file;
return [resultJson];
} catch (e) {
console.error(`Error while parsing the file under job id ${jobId}`, e);
if (this.ignoreErrors) {
return [];
} else {
throw e;
}
}
}
/**
@@ -326,51 +354,81 @@ export class LlamaParseReader extends FileReader {
jsonResult: Record<string, any>[],
downloadPath: string,
): Promise<Record<string, any>[]> {
const headers = { Authorization: `Bearer ${this.apiKey}` };
try {
// Create download directory if it doesn't exist (Actually check for write access, not existence, since fsPromises does not have a `existsSync` method)
try {
await fs.access(downloadPath);
} catch {
await fs.mkdir(downloadPath, { recursive: true });
}
// Create download directory if it doesn't exist (Actually check for write access, not existence, since fsPromises does not have a `existsSync` method)
if (!fs.access(downloadPath)) {
await fs.mkdir(downloadPath, { recursive: true });
}
const images: Record<string, any>[] = [];
for (const result of jsonResult) {
const jobId = result.job_id;
for (const page of result.pages) {
if (this.verbose) {
console.log(`> Image for page ${page.page}: ${page.images}`);
}
for (const image of page.images) {
const imageName = image.name;
// Get the full path
let imagePath = `${downloadPath}/${jobId}-${imageName}`;
if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
imagePath += ".png";
const images: Record<string, any>[] = [];
for (const result of jsonResult) {
const jobId = result.job_id;
for (const page of result.pages) {
if (this.verbose) {
console.log(`> Image for page ${page.page}: ${page.images}`);
}
// Get a valid image path
image.path = imagePath;
image.job_id = jobId;
image.original_pdf_path = result.file_path;
image.page_number = page.page;
const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`;
const response = await fetch(imageUrl, { headers });
if (!response.ok) {
throw new Error(
`Failed to download image: ${await response.text()}`,
for (const image of page.images) {
const imageName = image.name;
const imagePath = await this.getImagePath(
downloadPath,
jobId,
imageName,
);
await this.fetchAndSaveImage(imageName, imagePath, jobId);
// Assign metadata to the image
image.path = imagePath;
image.job_id = jobId;
image.original_pdf_path = result.file_path;
image.page_number = page.page;
images.push(image);
}
const arrayBuffer = await response.arrayBuffer();
const buffer = new Uint8Array(arrayBuffer);
await fs.writeFile(imagePath, buffer);
images.push(image);
}
}
return images;
} catch (e) {
console.error(`Error while downloading images from the parsed result`, e);
if (this.ignoreErrors) {
return [];
} else {
throw e;
}
}
return images;
}
private async getImagePath(
downloadPath: string,
jobId: string,
imageName: string,
): Promise<string> {
// Get the full path
let imagePath = `${downloadPath}/${jobId}-${imageName}`;
// Get a valid image path
if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
imagePath += ".png";
}
return imagePath;
}
private async fetchAndSaveImage(
imageName: string,
imagePath: string,
jobId: string,
): Promise<void> {
const headers = { Authorization: `Bearer ${this.apiKey}` };
// Construct the image URL
const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`;
const response = await fetch(imageUrl, { headers });
if (!response.ok) {
throw new Error(`Failed to download image: ${await response.text()}`);
}
// Convert the response to an ArrayBuffer and then to a Buffer
const arrayBuffer = await response.arrayBuffer();
const buffer = new Uint8Array(arrayBuffer);
// Write the image buffer to the specified imagePath
await fs.writeFile(imagePath, buffer);
}
static async getMimeType(