Release 0.4.14 (#1021 )

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
feat(cloud): update openapi.json (#1020 )
2026-07-03 19:19:08 -04:00 · 2024-07-05 15:26:31 -07:00 · 2024-07-05 15:01:22 -07:00 · 2024-07-05 11:44:37 -07:00 · 2024-07-05 20:43:26 +07:00 · 2024-07-05 14:32:26 +07:00
34 changed files with 2493 additions and 94 deletions
@@ -13,7 +13,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -10,7 +10,7 @@ jobs:
      - name: Checkout Repo
        uses: actions/checkout@v4

-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4

      - name: Setup Node.js
        uses: actions/setup-node@v4
@@ -12,7 +12,7 @@ jobs:
      - name: Checkout Repo
        uses: actions/checkout@v4

-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4

      - name: Setup Node.js
        uses: actions/setup-node@v4
@@ -15,7 +15,7 @@ jobs:
      - name: Checkout Repo
        uses: actions/checkout@v4

-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4

      - name: Setup Node.js
        uses: actions/setup-node@v4
@@ -23,7 +23,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4

      - name: Setup Node.js
        uses: actions/setup-node@v4
@@ -45,7 +45,7 @@ jobs:

    steps:
      - uses: actions/checkout@v4
-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -60,7 +60,7 @@ jobs:

    steps:
      - uses: actions/checkout@v4
-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -97,7 +97,7 @@ jobs:
    name: Build LlamaIndex Example (${{ matrix.packages }})
    steps:
      - uses: actions/checkout@v4
-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -116,7 +116,7 @@ jobs:

    steps:
      - uses: actions/checkout@v4
-      - uses: pnpm/action-setup@v3
+      - uses: pnpm/action-setup@v4
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -1,5 +1,19 @@
 # docs

+## 0.0.40
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.0.39
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.0.38

 ### Patch Changes
@@ -62,7 +62,7 @@ These building blocks can be customized to reflect ranking preferences, as well

 [**Retrievers**](../modules/retriever.md):
 A retriever defines how to efficiently retrieve relevant context from a knowledge base (i.e. index) when given a query.
-The specific retrieval logic differs for difference indices, the most popular being dense retrieval against a vector index.
+The specific retrieval logic differs for different indices, the most popular being dense retrieval against a vector index.

 [**Response Synthesizers**](../modules/response_synthesizer.md):
 A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.
@@ -44,6 +44,8 @@ They can be divided into two groups.
 - `pageSeperator?` Optional. The page seperator to use. Defaults is `\\n---\\n`.
 - `gpt4oMode` set to true to use GPT-4o to extract content. Default is `false`.
 - `gpt4oApiKey?` Optional. Set the GPT-4o API key. Lowers the cost of parsing by using your own API key. Your OpenAI account will be charged. Can also be set in the environment variable `LLAMA_CLOUD_GPT4O_API_KEY`.
+- `boundingBox?` Optional. Specify an area of the document to parse. Expects the bounding box margins as a string in clockwise order, e.g. `boundingBox = "0.1,0,0,0"` to not parse the top 10% of the document.
+- `targetPages?` Optional. Specify which pages to parse by specifying them as a comma-seperated list. First page is `0`.
 - `numWorkers` as in the python version, is set in `SimpleDirectoryReader`. Default is 1.

 ### LlamaParse with SimpleDirectoryReader
@@ -8,7 +8,7 @@ In JSON mode, LlamaParse will return a data structure representing the parsed ob

 ## Usage

-For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method. Currently it can't be used with `SimpleDirectoryReader`.
+For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method.
 More information about indexing the results on the next page.

 ```ts
@@ -1,6 +1,6 @@
 {
  "name": "docs",
-  "version": "0.0.38",
+  "version": "0.0.40",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
@@ -1,5 +1,21 @@
 # @llamaindex/autotool-02-next-example

+## 0.1.24
+
+### Patch Changes
+
+- llamaindex@0.4.14
+- @llamaindex/autotool@1.0.0
+
+## 0.1.23
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+  - @llamaindex/autotool@1.0.0
+
 ## 0.1.22

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool-02-next-example",
  "private": true,
-  "version": "0.1.22",
+  "version": "0.1.24",
  "scripts": {
    "dev": "next dev",
    "build": "next build",
@@ -51,7 +51,7 @@
    "unplugin": "^1.10.1"
  },
  "peerDependencies": {
-    "llamaindex": "^0.4.12",
+    "llamaindex": "^0.4.14",
    "openai": "^4",
    "typescript": "^4"
  },
@@ -1,5 +1,11 @@
 # @llamaindex/cloud

+## 0.1.3
+
+### Patch Changes
+
+- 1c444d5: feat(cloud): update openapi.json
+
 ## 0.1.2

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "0.1.2",
+  "version": "0.1.3",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -1,5 +1,19 @@
 # @llamaindex/community

+## 0.0.18
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.0.17
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.0.16

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/community",
  "description": "Community package for LlamaIndexTS",
-  "version": "0.0.16",
+  "version": "0.0.18",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/experimental

+## 0.0.49
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.0.48
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.0.47

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/experimental",
  "description": "Experimental package for LlamaIndexTS",
-  "version": "0.0.47",
+  "version": "0.0.49",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,19 @@
 # llamaindex

+## 0.4.14
+
+### Patch Changes
+
+- Updated dependencies [1c444d5]
+  - @llamaindex/cloud@0.1.3
+
+## 0.4.13
+
+### Patch Changes
+
+- e8f8bea: feat: add boundingBox and targetPages to LlamaParseReader
+- 304484b: feat: add ignoreErrors flag to LlamaParseReader
+
 ## 0.4.12

 ### Patch Changes
@@ -1,5 +1,19 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.33
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.0.32
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.0.31

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.31",
+  "version": "0.0.33",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,19 @@
 # @llamaindex/next-agent-test

+## 0.1.33
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.1.32
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.1.31

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.31",
+  "version": "0.1.33",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,19 @@
 # test-edge-runtime

+## 0.1.32
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.1.31
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.1.30

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.30",
+  "version": "0.1.32",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,19 @@
 # @llamaindex/next-node-runtime

+## 0.0.14
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.0.13
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.0.12

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.0.12",
+  "version": "0.0.14",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,19 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.33
+
+### Patch Changes
+
+- llamaindex@0.4.14
+
+## 0.0.32
+
+### Patch Changes
+
+- Updated dependencies [e8f8bea]
+- Updated dependencies [304484b]
+  - llamaindex@0.4.13
+
 ## 0.0.31

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.31",
+  "version": "0.0.33",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.4.12",
+  "version": "0.4.14",
  "license": "MIT",
  "type": "module",
  "keywords": [
@@ -8,6 +8,7 @@ import type { NodeWithScore } from "@llamaindex/core/schema";
 import { jsonToNode, ObjectType } from "@llamaindex/core/schema";
 import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
 import { wrapEventCaller } from "../internal/context/EventCaller.js";
+import { getCallbackManager } from "../internal/settings/CallbackManager.js";
 import { extractText } from "../llm/utils.js";
 import type { ClientParams, CloudConstructorParams } from "./constants.js";
 import { DEFAULT_PROJECT_NAME } from "./constants.js";
@@ -28,9 +29,14 @@ export class LlamaCloudRetriever implements BaseRetriever {
    nodes: TextNodeWithScore[],
  ): NodeWithScore[] {
    return nodes.map((node: TextNodeWithScore) => {
+      const textNode = jsonToNode(node.node, ObjectType.TEXT);
+      textNode.metadata = {
+        ...textNode.metadata,
+        ...node.node.extra_info, // append LlamaCloud extra_info to node metadata (file_name, pipeline_id, etc.)
+      };
      return {
        // Currently LlamaCloud only supports text nodes
-        node: jsonToNode(node.node, ObjectType.TEXT),
+        node: textNode,
        score: node.score,
      };
    });
@@ -83,6 +89,15 @@ export class LlamaCloudRetriever implements BaseRetriever {
        },
      });

-    return this.resultNodesToNodeWithScore(results.retrieval_nodes);
+    const nodesWithScores = this.resultNodesToNodeWithScore(
+      results.retrieval_nodes,
+    );
+    getCallbackManager().dispatchEvent("retrieve-end", {
+      payload: {
+        query,
+        nodes: nodesWithScores,
+      },
+    });
+    return nodesWithScores;
  }
 }
@@ -133,6 +133,12 @@ export class LlamaParseReader extends FileReader {
  gpt4oMode: boolean = false;
  // The API key for the GPT-4o API. Optional, lowers the cost of parsing. Can be set as an env variable: LLAMA_CLOUD_GPT4O_API_KEY.
  gpt4oApiKey?: string;
+  // The bounding box to use to extract text from documents. Describe as a string containing the bounding box margins.
+  boundingBox?: string;
+  // The target pages to extract text from documents. Describe as a comma separated list of page numbers. The first page of the document is page 0
+  targetPages?: string;
+  // Whether or not to ignore and skip errors raised during parsing.
+  ignoreErrors: boolean = true;
  // numWorkers is implemented in SimpleDirectoryReader

  constructor(params: Partial<LlamaParseReader> = {}) {
@@ -181,6 +187,8 @@ export class LlamaParseReader extends FileReader {
      page_seperator: this.pageSeperator,
      gpt4o_mode: this.gpt4oMode?.toString(),
      gpt4o_api_key: this.gpt4oApiKey,
+      bounding_box: this.boundingBox,
+      target_pages: this.targetPages,
    };

    // Appends body with any defined LlamaParseBodyParams
@@ -278,19 +286,29 @@ export class LlamaParseReader extends FileReader {
    fileContent: Uint8Array,
    fileName?: string,
  ): Promise<Document[]> {
-    // Creates a job for the file
-    const jobId = await this.createJob(fileContent, fileName);
-    if (this.verbose) {
-      console.log(`Started parsing the file under job id ${jobId}`);
-    }
+    let jobId;
+    try {
+      // Creates a job for the file
+      jobId = await this.createJob(fileContent, fileName);
+      if (this.verbose) {
+        console.log(`Started parsing the file under job id ${jobId}`);
+      }

-    // Return results as Document objects
-    const resultJson = await this.getJobResult(jobId, this.resultType);
-    return [
-      new Document({
-        text: resultJson[this.resultType],
-      }),
-    ];
+      // Return results as Document objects
+      const resultJson = await this.getJobResult(jobId, this.resultType);
+      return [
+        new Document({
+          text: resultJson[this.resultType],
+        }),
+      ];
+    } catch (e) {
+      console.error(`Error while parsing file under job id ${jobId}`, e);
+      if (this.ignoreErrors) {
+        return [];
+      } else {
+        throw e;
+      }
+    }
  }
  /**
   * Loads data from a file and returns an array of JSON objects.
@@ -300,18 +318,28 @@ export class LlamaParseReader extends FileReader {
   * @return {Promise<Record<string, any>[]>} A Promise that resolves to an array of JSON objects.
   */
  async loadJson(file: string): Promise<Record<string, any>[]> {
-    const data = await fs.readFile(file);
-    // Creates a job for the file
-    const jobId = await this.createJob(data);
-    if (this.verbose) {
-      console.log(`Started parsing the file under job id ${jobId}`);
-    }
+    let jobId;
+    try {
+      const data = await fs.readFile(file);
+      // Creates a job for the file
+      jobId = await this.createJob(data);
+      if (this.verbose) {
+        console.log(`Started parsing the file under job id ${jobId}`);
+      }

-    // Return results as an array of JSON objects (same format as Python version of the reader)
-    const resultJson = await this.getJobResult(jobId, "json");
-    resultJson.job_id = jobId;
-    resultJson.file_path = file;
-    return [resultJson];
+      // Return results as an array of JSON objects (same format as Python version of the reader)
+      const resultJson = await this.getJobResult(jobId, "json");
+      resultJson.job_id = jobId;
+      resultJson.file_path = file;
+      return [resultJson];
+    } catch (e) {
+      console.error(`Error while parsing the file under job id ${jobId}`, e);
+      if (this.ignoreErrors) {
+        return [];
+      } else {
+        throw e;
+      }
+    }
  }

  /**
@@ -326,51 +354,81 @@ export class LlamaParseReader extends FileReader {
    jsonResult: Record<string, any>[],
    downloadPath: string,
  ): Promise<Record<string, any>[]> {
-    const headers = { Authorization: `Bearer ${this.apiKey}` };
+    try {
+      // Create download directory if it doesn't exist (Actually check for write access, not existence, since fsPromises does not have a `existsSync` method)
+      try {
+        await fs.access(downloadPath);
+      } catch {
+        await fs.mkdir(downloadPath, { recursive: true });
+      }

-    // Create download directory if it doesn't exist (Actually check for write access, not existence, since fsPromises does not have a `existsSync` method)
-    if (!fs.access(downloadPath)) {
-      await fs.mkdir(downloadPath, { recursive: true });
-    }
-
-    const images: Record<string, any>[] = [];
-    for (const result of jsonResult) {
-      const jobId = result.job_id;
-      for (const page of result.pages) {
-        if (this.verbose) {
-          console.log(`> Image for page ${page.page}: ${page.images}`);
-        }
-        for (const image of page.images) {
-          const imageName = image.name;
-          // Get the full path
-          let imagePath = `${downloadPath}/${jobId}-${imageName}`;
-
-          if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
-            imagePath += ".png";
+      const images: Record<string, any>[] = [];
+      for (const result of jsonResult) {
+        const jobId = result.job_id;
+        for (const page of result.pages) {
+          if (this.verbose) {
+            console.log(`> Image for page ${page.page}: ${page.images}`);
          }
-
-          // Get a valid image path
-          image.path = imagePath;
-          image.job_id = jobId;
-          image.original_pdf_path = result.file_path;
-          image.page_number = page.page;
-
-          const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`;
-          const response = await fetch(imageUrl, { headers });
-          if (!response.ok) {
-            throw new Error(
-              `Failed to download image: ${await response.text()}`,
+          for (const image of page.images) {
+            const imageName = image.name;
+            const imagePath = await this.getImagePath(
+              downloadPath,
+              jobId,
+              imageName,
            );
+            await this.fetchAndSaveImage(imageName, imagePath, jobId);
+            // Assign metadata to the image
+            image.path = imagePath;
+            image.job_id = jobId;
+            image.original_pdf_path = result.file_path;
+            image.page_number = page.page;
+            images.push(image);
          }
-          const arrayBuffer = await response.arrayBuffer();
-          const buffer = new Uint8Array(arrayBuffer);
-          await fs.writeFile(imagePath, buffer);
-
-          images.push(image);
        }
      }
+      return images;
+    } catch (e) {
+      console.error(`Error while downloading images from the parsed result`, e);
+      if (this.ignoreErrors) {
+        return [];
+      } else {
+        throw e;
+      }
    }
-    return images;
+  }
+
+  private async getImagePath(
+    downloadPath: string,
+    jobId: string,
+    imageName: string,
+  ): Promise<string> {
+    // Get the full path
+    let imagePath = `${downloadPath}/${jobId}-${imageName}`;
+    // Get a valid image path
+    if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
+      imagePath += ".png";
+    }
+
+    return imagePath;
+  }
+
+  private async fetchAndSaveImage(
+    imageName: string,
+    imagePath: string,
+    jobId: string,
+  ): Promise<void> {
+    const headers = { Authorization: `Bearer ${this.apiKey}` };
+    // Construct the image URL
+    const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`;
+    const response = await fetch(imageUrl, { headers });
+    if (!response.ok) {
+      throw new Error(`Failed to download image: ${await response.text()}`);
+    }
+    // Convert the response to an ArrayBuffer and then to a Buffer
+    const arrayBuffer = await response.arrayBuffer();
+    const buffer = new Uint8Array(arrayBuffer);
+    // Write the image buffer to the specified imagePath
+    await fs.writeFile(imagePath, buffer);
  }

  static async getMimeType(
Author	SHA1	Message	Date
github-actions[bot]	c147d8a271	Release 0.4.14 (#1021 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-07-05 15:26:31 -07:00
Alex Yang	1c444d58b6	feat(cloud): update openapi.json (#1020 )	2024-07-05 15:01:22 -07:00
github-actions[bot]	1f910f7566	Release 0.4.13 (#1016 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-07-05 11:44:37 -07:00
Thuc Pham	99826cff43	fix: missing dispatch retrieve event on llamacloud retriever (#1018 )	2024-07-05 20:43:26 +07:00
Fabian Wimmer	e8f8bea969	feat: add boundingBox and targetPages to LlamaParseReader (#1017 )	2024-07-05 14:32:26 +07:00
Fabian Wimmer	304484b77a	feat: add ignoreErrors flag to LlamaParse (#959 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2024-07-04 20:51:05 +07:00
abgita	29fed77d58	Fixed a typo in the retriever description (#1009 )	2024-07-04 20:15:20 +07:00
Alex Yang	db070588c8	ci: fix setup pnpm (#1014 )	2024-07-03 12:11:48 -07:00