mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-03 19:19:08 -04:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c147d8a271 | |||
| 1c444d58b6 | |||
| 1f910f7566 | |||
| 99826cff43 | |||
| e8f8bea969 | |||
| 304484b77a | |||
| 29fed77d58 | |||
| db070588c8 |
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
|
||||
@@ -10,7 +10,7 @@ jobs:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
|
||||
@@ -12,7 +12,7 @@ jobs:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
|
||||
@@ -23,7 +23,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
@@ -60,7 +60,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
name: Build LlamaIndex Example (${{ matrix.packages }})
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pnpm/action-setup@v3
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# docs
|
||||
|
||||
## 0.0.40
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.0.39
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.0.38
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -62,7 +62,7 @@ These building blocks can be customized to reflect ranking preferences, as well
|
||||
|
||||
[**Retrievers**](../modules/retriever.md):
|
||||
A retriever defines how to efficiently retrieve relevant context from a knowledge base (i.e. index) when given a query.
|
||||
The specific retrieval logic differs for difference indices, the most popular being dense retrieval against a vector index.
|
||||
The specific retrieval logic differs for different indices, the most popular being dense retrieval against a vector index.
|
||||
|
||||
[**Response Synthesizers**](../modules/response_synthesizer.md):
|
||||
A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.
|
||||
|
||||
@@ -44,6 +44,8 @@ They can be divided into two groups.
|
||||
- `pageSeperator?` Optional. The page seperator to use. Defaults is `\\n---\\n`.
|
||||
- `gpt4oMode` set to true to use GPT-4o to extract content. Default is `false`.
|
||||
- `gpt4oApiKey?` Optional. Set the GPT-4o API key. Lowers the cost of parsing by using your own API key. Your OpenAI account will be charged. Can also be set in the environment variable `LLAMA_CLOUD_GPT4O_API_KEY`.
|
||||
- `boundingBox?` Optional. Specify an area of the document to parse. Expects the bounding box margins as a string in clockwise order, e.g. `boundingBox = "0.1,0,0,0"` to not parse the top 10% of the document.
|
||||
- `targetPages?` Optional. Specify which pages to parse by specifying them as a comma-seperated list. First page is `0`.
|
||||
- `numWorkers` as in the python version, is set in `SimpleDirectoryReader`. Default is 1.
|
||||
|
||||
### LlamaParse with SimpleDirectoryReader
|
||||
|
||||
@@ -8,7 +8,7 @@ In JSON mode, LlamaParse will return a data structure representing the parsed ob
|
||||
|
||||
## Usage
|
||||
|
||||
For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method. Currently it can't be used with `SimpleDirectoryReader`.
|
||||
For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method.
|
||||
More information about indexing the results on the next page.
|
||||
|
||||
```ts
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docs",
|
||||
"version": "0.0.38",
|
||||
"version": "0.0.40",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"docusaurus": "docusaurus",
|
||||
|
||||
@@ -1,5 +1,21 @@
|
||||
# @llamaindex/autotool-02-next-example
|
||||
|
||||
## 0.1.24
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
- @llamaindex/autotool@1.0.0
|
||||
|
||||
## 0.1.23
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
- @llamaindex/autotool@1.0.0
|
||||
|
||||
## 0.1.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool-02-next-example",
|
||||
"private": true,
|
||||
"version": "0.1.22",
|
||||
"version": "0.1.24",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"unplugin": "^1.10.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"llamaindex": "^0.4.12",
|
||||
"llamaindex": "^0.4.14",
|
||||
"openai": "^4",
|
||||
"typescript": "^4"
|
||||
},
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
# @llamaindex/cloud
|
||||
|
||||
## 0.1.3
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 1c444d5: feat(cloud): update openapi.json
|
||||
|
||||
## 0.1.2
|
||||
|
||||
### Patch Changes
|
||||
|
||||
+2184
-8
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloud",
|
||||
"version": "0.1.2",
|
||||
"version": "0.1.3",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# @llamaindex/community
|
||||
|
||||
## 0.0.18
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.0.17
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.0.16
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/community",
|
||||
"description": "Community package for LlamaIndexTS",
|
||||
"version": "0.0.16",
|
||||
"version": "0.0.18",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# @llamaindex/experimental
|
||||
|
||||
## 0.0.49
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.0.48
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.0.47
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/experimental",
|
||||
"description": "Experimental package for LlamaIndexTS",
|
||||
"version": "0.0.47",
|
||||
"version": "0.0.49",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# llamaindex
|
||||
|
||||
## 0.4.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1c444d5]
|
||||
- @llamaindex/cloud@0.1.3
|
||||
|
||||
## 0.4.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- e8f8bea: feat: add boundingBox and targetPages to LlamaParseReader
|
||||
- 304484b: feat: add ignoreErrors flag to LlamaParseReader
|
||||
|
||||
## 0.4.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# @llamaindex/cloudflare-worker-agent-test
|
||||
|
||||
## 0.0.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloudflare-worker-agent-test",
|
||||
"version": "0.0.31",
|
||||
"version": "0.0.33",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# @llamaindex/next-agent-test
|
||||
|
||||
## 0.1.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.1.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.1.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-agent-test",
|
||||
"version": "0.1.31",
|
||||
"version": "0.1.33",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# test-edge-runtime
|
||||
|
||||
## 0.1.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.1.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.1.30
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/nextjs-edge-runtime-test",
|
||||
"version": "0.1.30",
|
||||
"version": "0.1.32",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# @llamaindex/next-node-runtime
|
||||
|
||||
## 0.0.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.0.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.0.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-node-runtime-test",
|
||||
"version": "0.0.12",
|
||||
"version": "0.0.14",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# @llamaindex/waku-query-engine-test
|
||||
|
||||
## 0.0.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.4.14
|
||||
|
||||
## 0.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e8f8bea]
|
||||
- Updated dependencies [304484b]
|
||||
- llamaindex@0.4.13
|
||||
|
||||
## 0.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/waku-query-engine-test",
|
||||
"version": "0.0.31",
|
||||
"version": "0.0.33",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "llamaindex",
|
||||
"version": "0.4.12",
|
||||
"version": "0.4.14",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
"keywords": [
|
||||
|
||||
@@ -8,6 +8,7 @@ import type { NodeWithScore } from "@llamaindex/core/schema";
|
||||
import { jsonToNode, ObjectType } from "@llamaindex/core/schema";
|
||||
import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
|
||||
import { wrapEventCaller } from "../internal/context/EventCaller.js";
|
||||
import { getCallbackManager } from "../internal/settings/CallbackManager.js";
|
||||
import { extractText } from "../llm/utils.js";
|
||||
import type { ClientParams, CloudConstructorParams } from "./constants.js";
|
||||
import { DEFAULT_PROJECT_NAME } from "./constants.js";
|
||||
@@ -28,9 +29,14 @@ export class LlamaCloudRetriever implements BaseRetriever {
|
||||
nodes: TextNodeWithScore[],
|
||||
): NodeWithScore[] {
|
||||
return nodes.map((node: TextNodeWithScore) => {
|
||||
const textNode = jsonToNode(node.node, ObjectType.TEXT);
|
||||
textNode.metadata = {
|
||||
...textNode.metadata,
|
||||
...node.node.extra_info, // append LlamaCloud extra_info to node metadata (file_name, pipeline_id, etc.)
|
||||
};
|
||||
return {
|
||||
// Currently LlamaCloud only supports text nodes
|
||||
node: jsonToNode(node.node, ObjectType.TEXT),
|
||||
node: textNode,
|
||||
score: node.score,
|
||||
};
|
||||
});
|
||||
@@ -83,6 +89,15 @@ export class LlamaCloudRetriever implements BaseRetriever {
|
||||
},
|
||||
});
|
||||
|
||||
return this.resultNodesToNodeWithScore(results.retrieval_nodes);
|
||||
const nodesWithScores = this.resultNodesToNodeWithScore(
|
||||
results.retrieval_nodes,
|
||||
);
|
||||
getCallbackManager().dispatchEvent("retrieve-end", {
|
||||
payload: {
|
||||
query,
|
||||
nodes: nodesWithScores,
|
||||
},
|
||||
});
|
||||
return nodesWithScores;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,6 +133,12 @@ export class LlamaParseReader extends FileReader {
|
||||
gpt4oMode: boolean = false;
|
||||
// The API key for the GPT-4o API. Optional, lowers the cost of parsing. Can be set as an env variable: LLAMA_CLOUD_GPT4O_API_KEY.
|
||||
gpt4oApiKey?: string;
|
||||
// The bounding box to use to extract text from documents. Describe as a string containing the bounding box margins.
|
||||
boundingBox?: string;
|
||||
// The target pages to extract text from documents. Describe as a comma separated list of page numbers. The first page of the document is page 0
|
||||
targetPages?: string;
|
||||
// Whether or not to ignore and skip errors raised during parsing.
|
||||
ignoreErrors: boolean = true;
|
||||
// numWorkers is implemented in SimpleDirectoryReader
|
||||
|
||||
constructor(params: Partial<LlamaParseReader> = {}) {
|
||||
@@ -181,6 +187,8 @@ export class LlamaParseReader extends FileReader {
|
||||
page_seperator: this.pageSeperator,
|
||||
gpt4o_mode: this.gpt4oMode?.toString(),
|
||||
gpt4o_api_key: this.gpt4oApiKey,
|
||||
bounding_box: this.boundingBox,
|
||||
target_pages: this.targetPages,
|
||||
};
|
||||
|
||||
// Appends body with any defined LlamaParseBodyParams
|
||||
@@ -278,19 +286,29 @@ export class LlamaParseReader extends FileReader {
|
||||
fileContent: Uint8Array,
|
||||
fileName?: string,
|
||||
): Promise<Document[]> {
|
||||
// Creates a job for the file
|
||||
const jobId = await this.createJob(fileContent, fileName);
|
||||
if (this.verbose) {
|
||||
console.log(`Started parsing the file under job id ${jobId}`);
|
||||
}
|
||||
let jobId;
|
||||
try {
|
||||
// Creates a job for the file
|
||||
jobId = await this.createJob(fileContent, fileName);
|
||||
if (this.verbose) {
|
||||
console.log(`Started parsing the file under job id ${jobId}`);
|
||||
}
|
||||
|
||||
// Return results as Document objects
|
||||
const resultJson = await this.getJobResult(jobId, this.resultType);
|
||||
return [
|
||||
new Document({
|
||||
text: resultJson[this.resultType],
|
||||
}),
|
||||
];
|
||||
// Return results as Document objects
|
||||
const resultJson = await this.getJobResult(jobId, this.resultType);
|
||||
return [
|
||||
new Document({
|
||||
text: resultJson[this.resultType],
|
||||
}),
|
||||
];
|
||||
} catch (e) {
|
||||
console.error(`Error while parsing file under job id ${jobId}`, e);
|
||||
if (this.ignoreErrors) {
|
||||
return [];
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Loads data from a file and returns an array of JSON objects.
|
||||
@@ -300,18 +318,28 @@ export class LlamaParseReader extends FileReader {
|
||||
* @return {Promise<Record<string, any>[]>} A Promise that resolves to an array of JSON objects.
|
||||
*/
|
||||
async loadJson(file: string): Promise<Record<string, any>[]> {
|
||||
const data = await fs.readFile(file);
|
||||
// Creates a job for the file
|
||||
const jobId = await this.createJob(data);
|
||||
if (this.verbose) {
|
||||
console.log(`Started parsing the file under job id ${jobId}`);
|
||||
}
|
||||
let jobId;
|
||||
try {
|
||||
const data = await fs.readFile(file);
|
||||
// Creates a job for the file
|
||||
jobId = await this.createJob(data);
|
||||
if (this.verbose) {
|
||||
console.log(`Started parsing the file under job id ${jobId}`);
|
||||
}
|
||||
|
||||
// Return results as an array of JSON objects (same format as Python version of the reader)
|
||||
const resultJson = await this.getJobResult(jobId, "json");
|
||||
resultJson.job_id = jobId;
|
||||
resultJson.file_path = file;
|
||||
return [resultJson];
|
||||
// Return results as an array of JSON objects (same format as Python version of the reader)
|
||||
const resultJson = await this.getJobResult(jobId, "json");
|
||||
resultJson.job_id = jobId;
|
||||
resultJson.file_path = file;
|
||||
return [resultJson];
|
||||
} catch (e) {
|
||||
console.error(`Error while parsing the file under job id ${jobId}`, e);
|
||||
if (this.ignoreErrors) {
|
||||
return [];
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -326,51 +354,81 @@ export class LlamaParseReader extends FileReader {
|
||||
jsonResult: Record<string, any>[],
|
||||
downloadPath: string,
|
||||
): Promise<Record<string, any>[]> {
|
||||
const headers = { Authorization: `Bearer ${this.apiKey}` };
|
||||
try {
|
||||
// Create download directory if it doesn't exist (Actually check for write access, not existence, since fsPromises does not have a `existsSync` method)
|
||||
try {
|
||||
await fs.access(downloadPath);
|
||||
} catch {
|
||||
await fs.mkdir(downloadPath, { recursive: true });
|
||||
}
|
||||
|
||||
// Create download directory if it doesn't exist (Actually check for write access, not existence, since fsPromises does not have a `existsSync` method)
|
||||
if (!fs.access(downloadPath)) {
|
||||
await fs.mkdir(downloadPath, { recursive: true });
|
||||
}
|
||||
|
||||
const images: Record<string, any>[] = [];
|
||||
for (const result of jsonResult) {
|
||||
const jobId = result.job_id;
|
||||
for (const page of result.pages) {
|
||||
if (this.verbose) {
|
||||
console.log(`> Image for page ${page.page}: ${page.images}`);
|
||||
}
|
||||
for (const image of page.images) {
|
||||
const imageName = image.name;
|
||||
// Get the full path
|
||||
let imagePath = `${downloadPath}/${jobId}-${imageName}`;
|
||||
|
||||
if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
|
||||
imagePath += ".png";
|
||||
const images: Record<string, any>[] = [];
|
||||
for (const result of jsonResult) {
|
||||
const jobId = result.job_id;
|
||||
for (const page of result.pages) {
|
||||
if (this.verbose) {
|
||||
console.log(`> Image for page ${page.page}: ${page.images}`);
|
||||
}
|
||||
|
||||
// Get a valid image path
|
||||
image.path = imagePath;
|
||||
image.job_id = jobId;
|
||||
image.original_pdf_path = result.file_path;
|
||||
image.page_number = page.page;
|
||||
|
||||
const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`;
|
||||
const response = await fetch(imageUrl, { headers });
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Failed to download image: ${await response.text()}`,
|
||||
for (const image of page.images) {
|
||||
const imageName = image.name;
|
||||
const imagePath = await this.getImagePath(
|
||||
downloadPath,
|
||||
jobId,
|
||||
imageName,
|
||||
);
|
||||
await this.fetchAndSaveImage(imageName, imagePath, jobId);
|
||||
// Assign metadata to the image
|
||||
image.path = imagePath;
|
||||
image.job_id = jobId;
|
||||
image.original_pdf_path = result.file_path;
|
||||
image.page_number = page.page;
|
||||
images.push(image);
|
||||
}
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const buffer = new Uint8Array(arrayBuffer);
|
||||
await fs.writeFile(imagePath, buffer);
|
||||
|
||||
images.push(image);
|
||||
}
|
||||
}
|
||||
return images;
|
||||
} catch (e) {
|
||||
console.error(`Error while downloading images from the parsed result`, e);
|
||||
if (this.ignoreErrors) {
|
||||
return [];
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
return images;
|
||||
}
|
||||
|
||||
private async getImagePath(
|
||||
downloadPath: string,
|
||||
jobId: string,
|
||||
imageName: string,
|
||||
): Promise<string> {
|
||||
// Get the full path
|
||||
let imagePath = `${downloadPath}/${jobId}-${imageName}`;
|
||||
// Get a valid image path
|
||||
if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
|
||||
imagePath += ".png";
|
||||
}
|
||||
|
||||
return imagePath;
|
||||
}
|
||||
|
||||
private async fetchAndSaveImage(
|
||||
imageName: string,
|
||||
imagePath: string,
|
||||
jobId: string,
|
||||
): Promise<void> {
|
||||
const headers = { Authorization: `Bearer ${this.apiKey}` };
|
||||
// Construct the image URL
|
||||
const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`;
|
||||
const response = await fetch(imageUrl, { headers });
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to download image: ${await response.text()}`);
|
||||
}
|
||||
// Convert the response to an ArrayBuffer and then to a Buffer
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const buffer = new Uint8Array(arrayBuffer);
|
||||
// Write the image buffer to the specified imagePath
|
||||
await fs.writeFile(imagePath, buffer);
|
||||
}
|
||||
|
||||
static async getMimeType(
|
||||
|
||||
Reference in New Issue
Block a user