Compare commits

..

25 Commits

Author SHA1 Message Date
github-actions[bot] 59c5e5c3d4 Release 0.6.17 (#1305)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-10-07 14:44:04 +07:00
Thuc Pham ee697fb1b3 fix: generate uuid when inserting to Qdrant (#1301) 2024-10-07 14:17:04 +07:00
Alex Yang cf3320a4ea fix: improve getResponseSynthesizer type (#1304) 2024-10-06 19:15:55 -07:00
github-actions[bot] f2ed69f2f8 Release 0.6.16 (#1300)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-10-06 18:25:11 -07:00
Alex Yang 3489e7de84 fix: num output incorrect in prompt helper (#1303) 2024-10-06 18:19:05 -07:00
Alex Yang 468bda594e fix: correct warning when chunk size smaller than 0 (#1297) 2024-10-04 12:01:10 -07:00
Thuc Pham 6f3a31caf6 feat: add metadata filters for vector stores (#1289) 2024-10-04 14:25:11 +07:00
Thuc Pham 63e9846e97 fix: preFilters doesnot work with asQueryEngine (#1298) 2024-10-04 14:24:01 +07:00
github-actions[bot] b7382b0d24 Release 0.6.15 (#1295)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-10-03 19:44:55 -07:00
Alex Yang 2a8241328d fix: lazy load openai (#1294) 2024-10-03 17:12:33 -07:00
Alex Yang 0b20ff9f17 fix(cloud): package.json format (#1291) 2024-10-03 17:07:50 -07:00
github-actions[bot] 1fc26046e3 Release 0.6.14 (#1290)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-10-03 16:35:54 -07:00
LAWG b17d439d6d fix: ensure id_ is correctly passed during creation (#1282)
Co-authored-by: lawrencegb <lawrence@3api.com>
Co-authored-by: Alex Yang <himself65@outlook.com>
2024-10-03 11:52:26 -07:00
github-actions[bot] 040160c360 Release 0.6.13 (#1288)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-10-02 16:35:52 -07:00
Alex Yang 981811efd1 fix(cloud): llama parse reader save image incorrectly (#1287) 2024-10-02 14:31:03 -07:00
github-actions[bot] d563b45a27 Release (#1286)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-10-02 09:14:15 -07:00
Parham Saidi 2774e80234 feat: Meta Llama 3.2 via bedrock (#1285) 2024-10-02 08:59:09 -07:00
github-actions[bot] 449274ca5a Release 0.6.12 (#1273)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-09-30 16:34:14 -07:00
Alex Yang 78037a664c chore: update changelog 2024-09-30 16:13:22 -07:00
Alex Yang 1d9e3b1000 fix: export llama reader in non-nodejs runtime (#1279) 2024-09-30 16:13:07 -07:00
Alex Yang df83e32107 fix: bypass service context embed model (#1280) 2024-09-30 16:02:48 -07:00
Thuc Pham f7b4e94231 feat: add filters for pinecone (#1272) 2024-09-30 17:04:43 +07:00
Marcus Schiesser 4c07a2655d text: add cycle test (#1270) 2024-09-29 23:13:59 -07:00
Marcus Schiesser 5c0c8b2ec4 test: add concurrent test for workflows (#1269) 2024-09-29 22:10:21 -07:00
Emmanuel Ferdman e5e18688a6 fix: update reader reference (#1268)
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2024-09-28 14:44:44 -07:00
85 changed files with 1832 additions and 477 deletions
+43
View File
@@ -1,5 +1,48 @@
# docs
## 0.0.86
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.0.85
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.0.84
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.0.83
### Patch Changes
- llamaindex@0.6.14
## 0.0.82
### Patch Changes
- llamaindex@0.6.13
## 0.0.81
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.0.80
### Patch Changes
@@ -13,7 +13,7 @@ Official documentation for LlamaParse can be found [here](https://docs.cloud.lla
## Usage
You can then use the `LlamaParseReader` class to load local files and convert them into a parsed document that can be used by LlamaIndex.
See [LlamaParseReader.ts](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/llamaindex/src/readers/LlamaParseReader.ts) for a list of supported file types:
See [reader.ts](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/cloud/src/reader.ts) for a list of supported file types:
<CodeBlock language="ts">{CodeSource}</CodeBlock>
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "docs",
"version": "0.0.80",
"version": "0.0.86",
"private": true,
"scripts": {
"docusaurus": "docusaurus",
+1 -1
View File
@@ -13,7 +13,7 @@ import { FunctionTool, OpenAI, ToolCallOptions } from "llamaindex";
}
})();
async function callLLM(init: Partial<OpenAI>) {
async function callLLM(init: { model: string }) {
const csvData =
"Country,Average Height (cm)\nNetherlands,156\nDenmark,158\nNorway,160";
+51
View File
@@ -0,0 +1,51 @@
import {
Document,
MetadataFilters,
Settings,
SimpleDocumentStore,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
async function getDataSource() {
const docs = [
new Document({ text: "The dog is brown", metadata: { dogId: "1" } }),
new Document({ text: "The dog is yellow", metadata: { dogId: "2" } }),
];
const storageContext = await storageContextFromDefaults({
persistDir: "./cache",
});
const numberOfDocs = Object.keys(
(storageContext.docStore as SimpleDocumentStore).toDict(),
).length;
if (numberOfDocs === 0) {
return await VectorStoreIndex.fromDocuments(docs, { storageContext });
}
return await VectorStoreIndex.init({
storageContext,
});
}
Settings.callbackManager.on("retrieve-end", (event) => {
const { nodes, query } = event.detail;
console.log(`${query.query} - Number of retrieved nodes:`, nodes.length);
});
async function main() {
const index = await getDataSource();
const filters: MetadataFilters = {
filters: [{ key: "dogId", value: "2", operator: "==" }],
};
const retriever = index.asRetriever({ similarityTopK: 3, filters });
const queryEngine = index.asQueryEngine({
similarityTopK: 3,
preFilters: filters,
});
console.log("Retriever and query engine should only retrieve 1 node:");
await retriever.retrieve({ query: "Retriever: get dog" });
await queryEngine.query({ query: "QueryEngine: get dog" });
}
void main();
+42
View File
@@ -39,6 +39,12 @@ async function main() {
dogId: "2",
},
}),
new Document({
text: "The dog is black",
metadata: {
dogId: "3",
},
}),
];
console.log("Creating QdrantDB vector store");
const qdrantVs = new QdrantVectorStore({ url: qdrantUrl, collectionName });
@@ -73,6 +79,42 @@ async function main() {
query: "What is the color of the dog?",
});
console.log("Filter with dogId 2 response:", response.toString());
console.log("Querying index with dogId !=2: Expected output: Not red");
const queryEngineNotDogId2 = index.asQueryEngine({
preFilters: {
filters: [
{
key: "dogId",
value: "2",
operator: "!=",
},
],
},
});
const responseNotDogId2 = await queryEngineNotDogId2.query({
query: "What is the color of the dog?",
});
console.log(responseNotDogId2.toString());
console.log(
"Querying index with dogId 2 or 3: Expected output: Red, Black",
);
const queryEngineIn = index.asQueryEngine({
preFilters: {
filters: [
{
key: "dogId",
value: ["2", "3"],
operator: "in",
},
],
},
});
const responseIn = await queryEngineIn.query({
query: "List all dogs",
});
console.log(responseIn.toString());
} catch (e) {
console.error(e);
}
+3 -6
View File
@@ -25,12 +25,9 @@ async function main() {
similarityCutoff: 0.7,
});
// TODO: cannot pass responseSynthesizer into retriever query engine
const queryEngine = new RetrieverQueryEngine(
retriever,
undefined,
undefined,
[nodePostprocessor],
);
const queryEngine = new RetrieverQueryEngine(retriever, undefined, [
nodePostprocessor,
]);
const response = await queryEngine.query({
query: "What did the author do growing up?",
+1 -3
View File
@@ -165,9 +165,7 @@ async function main() {
});
const responseSynthesizer = getResponseSynthesizer("tree_summarize");
return new RetrieverQueryEngine(retriever, responseSynthesizer, {
filter,
});
return new RetrieverQueryEngine(retriever, responseSynthesizer);
};
// whatever is a key from your metadata
+43
View File
@@ -1,5 +1,48 @@
# @llamaindex/autotool
## 3.0.17
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 3.0.16
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 3.0.15
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 3.0.14
### Patch Changes
- llamaindex@0.6.14
## 3.0.13
### Patch Changes
- llamaindex@0.6.13
## 3.0.12
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 3.0.11
### Patch Changes
@@ -1,5 +1,54 @@
# @llamaindex/autotool-01-node-example
## 0.0.26
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
- @llamaindex/autotool@3.0.17
## 0.0.25
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
- @llamaindex/autotool@3.0.16
## 0.0.24
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
- @llamaindex/autotool@3.0.15
## 0.0.23
### Patch Changes
- llamaindex@0.6.14
- @llamaindex/autotool@3.0.14
## 0.0.22
### Patch Changes
- llamaindex@0.6.13
- @llamaindex/autotool@3.0.13
## 0.0.21
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
- @llamaindex/autotool@3.0.12
## 0.0.20
### Patch Changes
@@ -13,5 +13,5 @@
"scripts": {
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
},
"version": "0.0.20"
"version": "0.0.26"
}
@@ -1,5 +1,54 @@
# @llamaindex/autotool-02-next-example
## 0.1.70
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
- @llamaindex/autotool@3.0.17
## 0.1.69
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
- @llamaindex/autotool@3.0.16
## 0.1.68
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
- @llamaindex/autotool@3.0.15
## 0.1.67
### Patch Changes
- llamaindex@0.6.14
- @llamaindex/autotool@3.0.14
## 0.1.66
### Patch Changes
- llamaindex@0.6.13
- @llamaindex/autotool@3.0.13
## 0.1.65
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
- @llamaindex/autotool@3.0.12
## 0.1.64
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool-02-next-example",
"private": true,
"version": "0.1.64",
"version": "0.1.70",
"scripts": {
"dev": "next dev",
"build": "next build",
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool",
"type": "module",
"version": "3.0.11",
"version": "3.0.17",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
+27
View File
@@ -1,5 +1,32 @@
# @llamaindex/cloud
## 0.2.13
### Patch Changes
- Updated dependencies [ee697fb]
- @llamaindex/core@0.2.11
## 0.2.12
### Patch Changes
- Updated dependencies [3489e7d]
- Updated dependencies [468bda5]
- @llamaindex/core@0.2.10
## 0.2.11
### Patch Changes
- 0b20ff9: fix: package.json format
## 0.2.10
### Patch Changes
- 981811e: fix(cloud): llama parse reader save image incorrectly
## 0.2.9
### Patch Changes
+5 -5
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "0.2.9",
"version": "0.2.13",
"type": "module",
"license": "MIT",
"scripts": {
@@ -51,13 +51,13 @@
"devDependencies": {
"@hey-api/client-fetch": "^0.2.4",
"@hey-api/openapi-ts": "^0.53.0",
"@llamaindex/core": "workspace:^0.2.8",
"@llamaindex/env": "workspace:^0.1.13",
"@llamaindex/core": "workspace:*",
"@llamaindex/env": "workspace:*",
"bunchee": "5.3.2"
},
"peerDependencies": {
"@llamaindex/core": "workspace:^0.2.8",
"@llamaindex/env": "workspace:^0.1.13"
"@llamaindex/core": "workspace:*",
"@llamaindex/env": "workspace:*"
},
"dependencies": {
"magic-bytes.js": "^1.10.0"
+6 -14
View File
@@ -1,11 +1,11 @@
import { createClient, createConfig, type Client } from "@hey-api/client-fetch";
import { type Client, createClient, createConfig } from "@hey-api/client-fetch";
import { Document, FileReader } from "@llamaindex/core/schema";
import { fs, getEnv } from "@llamaindex/env";
import { fs, getEnv, path } from "@llamaindex/env";
import { filetypeinfo } from "magic-bytes.js";
import {
ParsingService,
type Body_upload_file_api_v1_parsing_upload_post,
type ParserLanguages,
ParsingService,
} from "./api";
import { sleep } from "./utils";
@@ -510,14 +510,7 @@ export class LlamaParseReader extends FileReader {
jobId: string,
imageName: string,
): Promise<string> {
// Get the full path
let imagePath = `${downloadPath}/${jobId}-${imageName}`;
// Get a valid image path
if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
imagePath += ".png";
}
return imagePath;
return path.join(downloadPath, `${jobId}-${imageName}`);
}
private async fetchAndSaveImage(
@@ -538,10 +531,9 @@ export class LlamaParseReader extends FileReader {
if (response.error) {
throw new Error(`Failed to download image: ${response.error.detail}`);
}
const arrayBuffer = (await response.data) as ArrayBuffer;
const buffer = new Uint8Array(arrayBuffer);
const blob = (await response.data) as Blob;
// Write the image buffer to the specified imagePath
await fs.writeFile(imagePath, buffer);
await fs.writeFile(imagePath, new Uint8Array(await blob.arrayBuffer()));
}
// Filters out invalid values (null, undefined, empty string) of specific params.
+28
View File
@@ -1,5 +1,33 @@
# @llamaindex/community
## 0.0.46
### Patch Changes
- Updated dependencies [ee697fb]
- @llamaindex/core@0.2.11
## 0.0.45
### Patch Changes
- Updated dependencies [3489e7d]
- Updated dependencies [468bda5]
- @llamaindex/core@0.2.10
## 0.0.44
### Patch Changes
- Updated dependencies [b17d439]
- @llamaindex/core@0.2.9
## 0.0.43
### Patch Changes
- 2774e80: feat: added meta3.2 support via Bedrock including vision, tool call and inference region support
## 0.0.42
### Patch Changes
+4 -2
View File
@@ -5,9 +5,11 @@
## Current Features:
- Bedrock support for the Anthropic Claude Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
- Bedrock support for the Meta LLama 2, 3 and 3.1 Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
- Meta LLama3.1 405b tool call support
- Bedrock support for the Meta LLama 2, 3, 3.1 and 3.2 Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
- Meta LLama3.1 405b and Llama3.2 tool call support
- Meta 3.2 11B and 90B vision support
- Bedrock support for querying Knowledge Base
- Bedrock: [Supported Regions and models for cross-region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html)
## LICENSE
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/community",
"description": "Community package for LlamaIndexTS",
"version": "0.0.42",
"version": "0.0.46",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+2
View File
@@ -2,5 +2,7 @@ export {
BEDROCK_MODELS,
BEDROCK_MODEL_MAX_TOKENS,
Bedrock,
INFERENCE_BEDROCK_MODELS,
INFERENCE_TO_BEDROCK_MAP,
} from "./llm/bedrock/index.js";
export { AmazonKnowledgeBaseRetriever } from "./retrievers/bedrock.js";
@@ -6,7 +6,10 @@ import type {
MessageContentDetail,
ToolCallLLMMessageOptions,
} from "@llamaindex/core/llms";
import { mapMessageContentToMessageContentDetails } from "../utils";
import {
extractDataUrlComponents,
mapMessageContentToMessageContentDetails,
} from "../utils";
import type {
AnthropicContent,
AnthropicImageContent,
@@ -143,27 +146,6 @@ export const mapTextContent = (text: string): AnthropicTextContent => {
return { type: "text", text };
};
export const extractDataUrlComponents = (
dataUrl: string,
): {
mimeType: string;
base64: string;
} => {
const parts = dataUrl.split(";base64,");
if (parts.length !== 2 || !parts[0]!.startsWith("data:")) {
throw new Error("Invalid data URL");
}
const mimeType = parts[0]!.slice(5);
const base64 = parts[1]!;
return {
mimeType,
base64,
};
};
export const mapImageContent = (imageUrl: string): AnthropicImageContent => {
if (!imageUrl.startsWith("data:"))
throw new Error(
+118 -36
View File
@@ -47,35 +47,96 @@ export type BedrockChatParamsNonStreaming = LLMChatParamsNonStreaming<
export type BedrockChatNonStreamResponse =
ChatResponse<ToolCallLLMMessageOptions>;
export enum BEDROCK_MODELS {
AMAZON_TITAN_TG1_LARGE = "amazon.titan-tg1-large",
AMAZON_TITAN_TEXT_EXPRESS_V1 = "amazon.titan-text-express-v1",
AI21_J2_GRANDE_INSTRUCT = "ai21.j2-grande-instruct",
AI21_J2_JUMBO_INSTRUCT = "ai21.j2-jumbo-instruct",
AI21_J2_MID = "ai21.j2-mid",
AI21_J2_MID_V1 = "ai21.j2-mid-v1",
AI21_J2_ULTRA = "ai21.j2-ultra",
AI21_J2_ULTRA_V1 = "ai21.j2-ultra-v1",
COHERE_COMMAND_TEXT_V14 = "cohere.command-text-v14",
ANTHROPIC_CLAUDE_INSTANT_1 = "anthropic.claude-instant-v1",
ANTHROPIC_CLAUDE_1 = "anthropic.claude-v1", // EOF: No longer supported
ANTHROPIC_CLAUDE_2 = "anthropic.claude-v2",
ANTHROPIC_CLAUDE_2_1 = "anthropic.claude-v2:1",
ANTHROPIC_CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0",
ANTHROPIC_CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0",
ANTHROPIC_CLAUDE_3_OPUS = "anthropic.claude-3-opus-20240229-v1:0",
ANTHROPIC_CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0",
META_LLAMA2_13B_CHAT = "meta.llama2-13b-chat-v1",
META_LLAMA2_70B_CHAT = "meta.llama2-70b-chat-v1",
META_LLAMA3_8B_INSTRUCT = "meta.llama3-8b-instruct-v1:0",
META_LLAMA3_70B_INSTRUCT = "meta.llama3-70b-instruct-v1:0",
META_LLAMA3_1_8B_INSTRUCT = "meta.llama3-1-8b-instruct-v1:0",
META_LLAMA3_1_70B_INSTRUCT = "meta.llama3-1-70b-instruct-v1:0",
META_LLAMA3_1_405B_INSTRUCT = "meta.llama3-1-405b-instruct-v1:0",
MISTRAL_7B_INSTRUCT = "mistral.mistral-7b-instruct-v0:2",
MISTRAL_MIXTRAL_7B_INSTRUCT = "mistral.mixtral-8x7b-instruct-v0:1",
MISTRAL_MIXTRAL_LARGE_2402 = "mistral.mistral-large-2402-v1:0",
}
export const BEDROCK_MODELS = {
AMAZON_TITAN_TG1_LARGE: "amazon.titan-tg1-large",
AMAZON_TITAN_TEXT_EXPRESS_V1: "amazon.titan-text-express-v1",
AI21_J2_GRANDE_INSTRUCT: "ai21.j2-grande-instruct",
AI21_J2_JUMBO_INSTRUCT: "ai21.j2-jumbo-instruct",
AI21_J2_MID: "ai21.j2-mid",
AI21_J2_MID_V1: "ai21.j2-mid-v1",
AI21_J2_ULTRA: "ai21.j2-ultra",
AI21_J2_ULTRA_V1: "ai21.j2-ultra-v1",
COHERE_COMMAND_TEXT_V14: "cohere.command-text-v14",
ANTHROPIC_CLAUDE_INSTANT_1: "anthropic.claude-instant-v1",
ANTHROPIC_CLAUDE_1: "anthropic.claude-v1", // EOF: No longer supported
ANTHROPIC_CLAUDE_2: "anthropic.claude-v2",
ANTHROPIC_CLAUDE_2_1: "anthropic.claude-v2:1",
ANTHROPIC_CLAUDE_3_SONNET: "anthropic.claude-3-sonnet-20240229-v1:0",
ANTHROPIC_CLAUDE_3_HAIKU: "anthropic.claude-3-haiku-20240307-v1:0",
ANTHROPIC_CLAUDE_3_OPUS: "anthropic.claude-3-opus-20240229-v1:0",
ANTHROPIC_CLAUDE_3_5_SONNET: "anthropic.claude-3-5-sonnet-20240620-v1:0",
META_LLAMA2_13B_CHAT: "meta.llama2-13b-chat-v1",
META_LLAMA2_70B_CHAT: "meta.llama2-70b-chat-v1",
META_LLAMA3_8B_INSTRUCT: "meta.llama3-8b-instruct-v1:0",
META_LLAMA3_70B_INSTRUCT: "meta.llama3-70b-instruct-v1:0",
META_LLAMA3_1_8B_INSTRUCT: "meta.llama3-1-8b-instruct-v1:0",
META_LLAMA3_1_70B_INSTRUCT: "meta.llama3-1-70b-instruct-v1:0",
META_LLAMA3_1_405B_INSTRUCT: "meta.llama3-1-405b-instruct-v1:0",
META_LLAMA3_2_1B_INSTRUCT: "meta.llama3-2-1b-instruct-v1:0",
META_LLAMA3_2_3B_INSTRUCT: "meta.llama3-2-3b-instruct-v1:0",
META_LLAMA3_2_11B_INSTRUCT: "meta.llama3-2-11b-instruct-v1:0",
META_LLAMA3_2_90B_INSTRUCT: "meta.llama3-2-90b-instruct-v1:0",
MISTRAL_7B_INSTRUCT: "mistral.mistral-7b-instruct-v0:2",
MISTRAL_MIXTRAL_7B_INSTRUCT: "mistral.mixtral-8x7b-instruct-v0:1",
MISTRAL_MIXTRAL_LARGE_2402: "mistral.mistral-large-2402-v1:0",
};
export type BEDROCK_MODELS =
(typeof BEDROCK_MODELS)[keyof typeof BEDROCK_MODELS];
export const INFERENCE_BEDROCK_MODELS = {
US_ANTHROPIC_CLAUDE_3_HAIKU: "us.anthropic.claude-3-haiku-20240307-v1:0",
US_ANTHROPIC_CLAUDE_3_OPUS: "us.anthropic.claude-3-opus-20240229-v1:0",
US_ANTHROPIC_CLAUDE_3_SONNET: "us.anthropic.claude-3-sonnet-20240229-v1:0",
US_ANTHROPIC_CLAUDE_3_5_SONNET:
"us.anthropic.claude-3-5-sonnet-20240620-v1:0",
US_META_LLAMA_3_2_1B_INSTRUCT: "us.meta.llama3-2-1b-instruct-v1:0",
US_META_LLAMA_3_2_3B_INSTRUCT: "us.meta.llama3-2-3b-instruct-v1:0",
US_META_LLAMA_3_2_11B_INSTRUCT: "us.meta.llama3-2-11b-instruct-v1:0",
US_META_LLAMA_3_2_90B_INSTRUCT: "us.meta.llama3-2-90b-instruct-v1:0",
EU_ANTHROPIC_CLAUDE_3_HAIKU: "eu.anthropic.claude-3-haiku-20240307-v1:0",
EU_ANTHROPIC_CLAUDE_3_SONNET: "eu.anthropic.claude-3-sonnet-20240229-v1:0",
EU_ANTHROPIC_CLAUDE_3_5_SONNET:
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0",
EU_META_LLAMA_3_2_1B_INSTRUCT: "eu.meta.llama3-2-1b-instruct-v1:0",
EU_META_LLAMA_3_2_3B_INSTRUCT: "eu.meta.llama3-2-3b-instruct-v1:0",
};
export type INFERENCE_BEDROCK_MODELS =
(typeof INFERENCE_BEDROCK_MODELS)[keyof typeof INFERENCE_BEDROCK_MODELS];
export const INFERENCE_TO_BEDROCK_MAP: Record<
INFERENCE_BEDROCK_MODELS,
BEDROCK_MODELS
> = {
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_HAIKU]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_OPUS]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_OPUS,
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_5_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_1B_INSTRUCT]:
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_3B_INSTRUCT]:
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_11B_INSTRUCT]:
BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT,
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_90B_INSTRUCT]:
BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT,
[INFERENCE_BEDROCK_MODELS.EU_ANTHROPIC_CLAUDE_3_HAIKU]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
[INFERENCE_BEDROCK_MODELS.EU_ANTHROPIC_CLAUDE_3_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
[INFERENCE_BEDROCK_MODELS.EU_ANTHROPIC_CLAUDE_3_5_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
[INFERENCE_BEDROCK_MODELS.EU_META_LLAMA_3_2_1B_INSTRUCT]:
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
[INFERENCE_BEDROCK_MODELS.EU_META_LLAMA_3_2_3B_INSTRUCT]:
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
};
/*
* Values taken from https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html#model-parameters-claude
@@ -109,6 +170,10 @@ const CHAT_ONLY_MODELS = {
[BEDROCK_MODELS.META_LLAMA3_1_8B_INSTRUCT]: 128000,
[BEDROCK_MODELS.META_LLAMA3_1_70B_INSTRUCT]: 128000,
[BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT]: 128000,
[BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT]: 131000,
[BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT]: 131000,
[BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT]: 128000,
[BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT]: 128000,
[BEDROCK_MODELS.MISTRAL_7B_INSTRUCT]: 32000,
[BEDROCK_MODELS.MISTRAL_MIXTRAL_7B_INSTRUCT]: 32000,
[BEDROCK_MODELS.MISTRAL_MIXTRAL_LARGE_2402]: 32000,
@@ -139,17 +204,25 @@ export const STREAMING_MODELS = new Set([
BEDROCK_MODELS.META_LLAMA3_1_8B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_1_70B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT,
BEDROCK_MODELS.MISTRAL_7B_INSTRUCT,
BEDROCK_MODELS.MISTRAL_MIXTRAL_7B_INSTRUCT,
BEDROCK_MODELS.MISTRAL_MIXTRAL_LARGE_2402,
]);
export const TOOL_CALL_MODELS = [
export const TOOL_CALL_MODELS: BEDROCK_MODELS[] = [
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_OPUS,
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT,
BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT,
];
const getProvider = (model: string): Provider => {
@@ -166,7 +239,7 @@ const getProvider = (model: string): Provider => {
};
export type BedrockModelParams = {
model: keyof typeof BEDROCK_FOUNDATION_LLMS;
model: BEDROCK_MODELS | INFERENCE_BEDROCK_MODELS;
temperature?: number;
topP?: number;
maxTokens?: number;
@@ -185,6 +258,10 @@ export const BEDROCK_MODEL_MAX_TOKENS: Partial<Record<BEDROCK_MODELS, number>> =
[BEDROCK_MODELS.META_LLAMA3_1_8B_INSTRUCT]: 2048,
[BEDROCK_MODELS.META_LLAMA3_1_70B_INSTRUCT]: 2048,
[BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT]: 2048,
[BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT]: 2048,
[BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT]: 2048,
[BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT]: 2048,
[BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT]: 2048,
};
const DEFAULT_BEDROCK_PARAMS = {
@@ -193,14 +270,15 @@ const DEFAULT_BEDROCK_PARAMS = {
maxTokens: 1024, // required by anthropic
};
export type BedrockParams = BedrockModelParams & BedrockRuntimeClientConfig;
export type BedrockParams = BedrockRuntimeClientConfig & BedrockModelParams;
/**
* ToolCallLLM for Bedrock
*/
export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
private client: BedrockRuntimeClient;
model: keyof typeof BEDROCK_FOUNDATION_LLMS;
protected actualModel: BEDROCK_MODELS | INFERENCE_BEDROCK_MODELS;
model: BEDROCK_MODELS;
temperature: number;
topP: number;
maxTokens?: number;
@@ -217,8 +295,8 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
...params
}: BedrockParams) {
super();
this.model = model;
this.actualModel = model;
this.model = INFERENCE_TO_BEDROCK_MAP[model] ?? model;
this.provider = getProvider(this.model);
this.maxTokens = maxTokens ?? DEFAULT_BEDROCK_PARAMS.maxTokens;
this.temperature = temperature ?? DEFAULT_BEDROCK_PARAMS.temperature;
@@ -241,7 +319,7 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
temperature: this.temperature,
topP: this.topP,
maxTokens: this.maxTokens,
contextWindow: BEDROCK_FOUNDATION_LLMS[this.model],
contextWindow: BEDROCK_FOUNDATION_LLMS[this.model] ?? 128000,
tokenizer: undefined,
};
}
@@ -256,6 +334,8 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
params.additionalChatOptions,
);
const command = new InvokeModelCommand(input);
command.input.modelId = this.actualModel;
const response = await this.client.send(command);
let options: ToolCallLLMMessageOptions = {};
if (this.supportToolCall) {
@@ -287,6 +367,8 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
params.additionalChatOptions,
);
const command = new InvokeModelWithResponseStreamCommand(input);
command.input.modelId = this.actualModel;
const response = await this.client.send(command);
if (response.body) yield* this.provider.reduceStream(response.body);
@@ -67,21 +67,26 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
for await (const response of stream) {
const event = this.getStreamingEventResponse(response);
const delta = this.getTextFromStreamResponse(response);
// odd quirk of llama3.1, start token is \n\n
if (
!toolId &&
!event?.generation.trim() &&
event?.generation_token_count === 1 &&
event.prompt_token_count !== null
event?.prompt_token_count !== null
)
continue;
if (delta === TOKENS.TOOL_CALL) {
if (delta.startsWith(TOKENS.TOOL_CALL)) {
toolId = randomUUID();
const parts = delta.split(TOKENS.TOOL_CALL).filter((part) => part);
collecting.push(...parts);
continue;
}
let options: undefined | ToolCallLLMMessageOptions = undefined;
if (toolId && event?.stop_reason === "stop") {
if (delta) collecting.push(delta);
const tool = JSON.parse(collecting.join(""));
options = {
toolCall: [
@@ -110,11 +115,18 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
getRequestBody<T extends ChatMessage>(
metadata: LLMMetadata,
messages: T[],
tools?: BaseTool[],
tools: BaseTool[] = [],
): InvokeModelCommandInput | InvokeModelWithResponseStreamCommandInput {
let prompt: string = "";
let images: string[] = [];
if (metadata.model.startsWith("meta.llama3")) {
prompt = mapChatMessagesToMetaLlama3Messages(messages, tools);
const mapped = mapChatMessagesToMetaLlama3Messages({
messages,
tools,
model: metadata.model,
});
prompt = mapped.prompt;
images = mapped.images;
} else if (metadata.model.startsWith("meta.llama2")) {
prompt = mapChatMessagesToMetaLlama2Messages(messages);
} else {
@@ -127,6 +139,7 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
accept: "application/json",
body: JSON.stringify({
prompt,
images: images.length ? images : undefined,
max_gen_len: metadata.maxTokens,
temperature: metadata.temperature,
top_p: metadata.topP,
@@ -1,9 +1,12 @@
import type {
BaseTool,
ChatMessage,
LLMMetadata,
MessageContentTextDetail,
ToolCallLLMMessageOptions,
} from "@llamaindex/core/llms";
import { extractDataUrlComponents } from "../utils";
import { TOKENS } from "./constants";
import type { MetaMessage } from "./types";
const getToolCallInstructionString = (tool: BaseTool): string => {
@@ -24,7 +27,7 @@ const getToolCallParametersString = (tool: BaseTool): string => {
// ported from https://github.com/meta-llama/llama-agentic-system/blob/main/llama_agentic_system/system_prompt.py
// NOTE: using json instead of the above xml style tool calling works more reliability
export const getToolsPrompt = (tools?: BaseTool[]) => {
export const getToolsPrompt_3_1 = (tools?: BaseTool[]) => {
if (!tools?.length) return "";
const customToolParams = tools.map((tool) => {
@@ -77,6 +80,46 @@ Reminder:
`;
};
export const getToolsPrompt_3_2 = (tools?: BaseTool[]) => {
if (!tools?.length) return "";
return `
You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
also point it out. You should only return the function call in tools call sections.
If you decide to invoke any of the function(s), you MUST put it in the format of and start with the token: ${TOKENS.TOOL_CALL}:
{
"name": function_name,
"parameters": parameters,
}
where
{
"name": function_name,
"parameters": parameters, => a JSON dict with the function argument name as key and function argument value as value.
}
Here is an example,
{
"name": "example_function_name",
"parameters": {"example_name": "example_value"}
}
Reminder:
- Function calls MUST follow the specified format
- Required parameters MUST be specified
- Only call one function at a time
- You SHOULD NOT include any other text in the response
- Put the entire function call reply on one line
Here is a list of functions in JSON format that you can invoke.
${JSON.stringify(tools)}
`;
};
export const mapChatRoleToMetaRole = (
role: ChatMessage["role"],
): MetaMessage["role"] => {
@@ -125,16 +168,46 @@ export const mapChatMessagesToMetaMessages = <
/**
* Documentation at https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3
*/
export const mapChatMessagesToMetaLlama3Messages = <T extends ChatMessage>(
messages: T[],
tools?: BaseTool[],
): string => {
export const mapChatMessagesToMetaLlama3Messages = <T extends ChatMessage>({
messages,
model,
tools,
}: {
messages: T[];
model: LLMMetadata["model"];
tools?: BaseTool[];
}): { prompt: string; images: string[] } => {
const images: string[] = [];
const textMessages: T[] = [];
messages.forEach((message) => {
if (Array.isArray(message.content)) {
message.content.forEach((content) => {
if (content.type === "image_url") {
const { base64 } = extractDataUrlComponents(content.image_url.url);
images.push(base64);
} else {
textMessages.push(message);
}
});
} else {
textMessages.push(message);
}
});
const parts: string[] = [];
if (tools?.length) {
let toolsPrompt = "";
if (model.startsWith("meta.llama3-2")) {
toolsPrompt = getToolsPrompt_3_2(tools);
} else if (model.startsWith("meta.llama3-1")) {
toolsPrompt = getToolsPrompt_3_1(tools);
}
if (toolsPrompt) {
parts.push(
"<|begin_of_text|>",
"<|start_header_id|>system<|end_header_id|>",
getToolsPrompt(tools),
toolsPrompt,
"<|eot_id|>",
);
}
@@ -154,7 +227,9 @@ export const mapChatMessagesToMetaLlama3Messages = <T extends ChatMessage>(
...mapped,
"<|start_header_id|>assistant<|end_header_id|>",
);
return parts.join("\n");
const prompt = parts.join("\n");
return { prompt, images };
};
/**
@@ -11,3 +11,24 @@ export const mapMessageContentToMessageContentDetails = (
export const toUtf8 = (input: Uint8Array): string =>
new TextDecoder("utf-8").decode(input);
export const extractDataUrlComponents = (
dataUrl: string,
): {
mimeType: string;
base64: string;
} => {
const parts = dataUrl.split(";base64,");
if (parts.length !== 2 || !parts[0]!.startsWith("data:")) {
throw new Error("Invalid data URL");
}
const mimeType = parts[0]!.slice(5);
const base64 = parts[1]!;
return {
mimeType,
base64,
};
};
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/core
## 0.2.11
### Patch Changes
- ee697fb: fix: generate uuid when inserting to Qdrant
## 0.2.10
### Patch Changes
- 3489e7d: fix: num output incorrect in prompt helper
- 468bda5: fix: correct warning when chunk size smaller than 0
## 0.2.9
### Patch Changes
- b17d439: Fix #1278: resolved issue where the id\_ was not correctly passed as the id when creating a TextNode. As a result, the upsert operation to the vector database was using a generated ID instead of the provided document ID, if available.
## 0.2.8
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.2.8",
"version": "0.2.11",
"description": "LlamaIndex Core Module",
"exports": {
"./agent": {
+86 -62
View File
@@ -8,18 +8,16 @@ import {
Settings,
} from "../global";
import type { LLMMetadata } from "../llms";
import { SentenceSplitter } from "../node-parser";
import type { PromptTemplate } from "../prompts";
import { TextSplitter, TokenTextSplitter, truncateText } from "../node-parser";
import { BasePromptTemplate, PromptTemplate } from "../prompts";
/**
* Get the empty prompt text given a prompt.
*/
function getEmptyPromptTxt(prompt: PromptTemplate) {
return prompt.format({
...Object.fromEntries(
[...prompt.templateVars.keys()].map((key) => [key, ""]),
),
});
function getEmptyPromptTxt(prompt: PromptTemplate): string {
return prompt.format(
Object.fromEntries([...prompt.templateVars.keys()].map((key) => [key, ""])),
);
}
/**
@@ -35,24 +33,24 @@ export function getBiggestPrompt(prompts: PromptTemplate[]): PromptTemplate {
}
export type PromptHelperOptions = {
contextWindow?: number;
numOutput?: number;
chunkOverlapRatio?: number;
chunkSizeLimit?: number;
tokenizer?: Tokenizer;
separator?: string;
contextWindow?: number | undefined;
numOutput?: number | undefined;
chunkOverlapRatio?: number | undefined;
chunkSizeLimit?: number | undefined;
tokenizer?: Tokenizer | undefined;
separator?: string | undefined;
};
/**
* A collection of helper functions for working with prompts.
*/
export class PromptHelper {
contextWindow = DEFAULT_CONTEXT_WINDOW;
numOutput = DEFAULT_NUM_OUTPUTS;
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
contextWindow: number;
numOutput: number;
chunkOverlapRatio: number;
chunkSizeLimit: number | undefined;
tokenizer: Tokenizer;
separator = " ";
separator: string;
constructor(options: PromptHelperOptions = {}) {
const {
@@ -72,68 +70,93 @@ export class PromptHelper {
}
/**
* Given a prompt, return the maximum size of the inputs to the prompt.
* @param prompt
* @returns
* Calculate the available context size based on the number of prompt tokens.
*/
private getAvailableContextSize(prompt: PromptTemplate) {
const emptyPromptText = getEmptyPromptTxt(prompt);
const promptTokens = this.tokenizer.encode(emptyPromptText);
const numPromptTokens = promptTokens.length;
return this.contextWindow - numPromptTokens - this.numOutput;
}
/**
* Find the maximum size of each chunk given a prompt.
*/
private getAvailableChunkSize(
prompt: PromptTemplate,
numChunks = 1,
padding = 5,
): number {
const availableContextSize = this.getAvailableContextSize(prompt);
const result = Math.floor(availableContextSize / numChunks) - padding;
if (this.chunkSizeLimit) {
return Math.min(this.chunkSizeLimit, result);
} else {
return result;
#getAvailableContextSize(numPromptTokens: number): number {
const contextSizeTokens =
this.contextWindow - numPromptTokens - this.numOutput;
if (contextSizeTokens < 0) {
throw new Error(
`Calculated available context size ${contextSizeTokens} is not non-negative.`,
);
}
return contextSizeTokens;
}
/**
* Creates a text splitter with the correct chunk sizes and overlaps given a prompt.
* Calculate the available chunk size based on the prompt and other parameters.
*/
#getAvailableChunkSize<Template extends BasePromptTemplate>(
prompt: Template,
numChunks: number = 1,
padding: number = 5,
): number {
let numPromptTokens = 0;
if (prompt instanceof PromptTemplate) {
numPromptTokens = this.tokenizer.encode(getEmptyPromptTxt(prompt)).length;
}
const availableContextSize = this.#getAvailableContextSize(numPromptTokens);
let result = Math.floor(availableContextSize / numChunks) - padding;
if (this.chunkSizeLimit !== undefined) {
result = Math.min(this.chunkSizeLimit, result);
}
return result;
}
/**
* Creates a text splitter configured to maximally pack the available context window.
*/
getTextSplitterGivenPrompt(
prompt: PromptTemplate,
numChunks = 1,
padding = DEFAULT_PADDING,
) {
const chunkSize = this.getAvailableChunkSize(prompt, numChunks, padding);
if (chunkSize === 0) {
throw new Error("Got 0 as available chunk size");
prompt: BasePromptTemplate,
numChunks: number = 1,
padding: number = DEFAULT_PADDING,
): TextSplitter {
const chunkSize = this.#getAvailableChunkSize(prompt, numChunks, padding);
if (chunkSize <= 0) {
throw new TypeError(`Chunk size ${chunkSize} is not positive.`);
}
const chunkOverlap = this.chunkOverlapRatio * chunkSize;
return new SentenceSplitter({
const chunkOverlap = Math.floor(this.chunkOverlapRatio * chunkSize);
return new TokenTextSplitter({
separator: this.separator,
chunkSize,
chunkOverlap,
separator: this.separator,
tokenizer: this.tokenizer,
});
}
/**
* Repack resplits the strings based on the optimal text splitter.
* Truncate text chunks to fit within the available context window.
*/
truncate(
prompt: BasePromptTemplate,
textChunks: string[],
padding: number = DEFAULT_PADDING,
): string[] {
const textSplitter = this.getTextSplitterGivenPrompt(
prompt,
textChunks.length,
padding,
);
return textChunks.map((chunk) => truncateText(chunk, textSplitter));
}
/**
* Repack text chunks to better utilize the available context window.
*/
repack(
prompt: PromptTemplate,
prompt: BasePromptTemplate,
textChunks: string[],
padding = DEFAULT_PADDING,
) {
padding: number = DEFAULT_PADDING,
): string[] {
const textSplitter = this.getTextSplitterGivenPrompt(prompt, 1, padding);
const combinedStr = textChunks.join("\n\n");
const combinedStr = textChunks
.map((c) => c.trim())
.filter((c) => c.length > 0)
.join("\n\n");
return textSplitter.splitText(combinedStr);
}
@@ -154,7 +177,8 @@ export class PromptHelper {
} = options ?? {};
return new PromptHelper({
contextWindow: metadata.contextWindow,
numOutput: metadata.maxTokens ?? DEFAULT_NUM_OUTPUTS,
// fixme: numOutput is not in LLMMetadata
numOutput: DEFAULT_NUM_OUTPUTS,
chunkOverlapRatio,
chunkSizeLimit,
tokenizer,
+2
View File
@@ -13,6 +13,7 @@ export { MetadataAwareTextSplitter, NodeParser, TextSplitter } from "./base";
export { MarkdownNodeParser } from "./markdown";
export { SentenceSplitter } from "./sentence-splitter";
export { SentenceWindowNodeParser } from "./sentence-window";
export { TokenTextSplitter } from "./token-text-splitter";
export type { SplitterParams } from "./type";
export {
splitByChar,
@@ -20,5 +21,6 @@ export {
splitByRegex,
splitBySentenceTokenizer,
splitBySep,
truncateText,
} from "./utils";
export type { TextSplitterFn } from "./utils";
@@ -0,0 +1,206 @@
import type { Tokenizer } from "@llamaindex/env";
import { z } from "zod";
import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, Settings } from "../global";
import { MetadataAwareTextSplitter } from "./base";
import type { SplitterParams } from "./type";
import { splitByChar, splitBySep } from "./utils";
const DEFAULT_METADATA_FORMAT_LEN = 2;
const tokenTextSplitterSchema = z.object({
chunkSize: z.number().positive().default(DEFAULT_CHUNK_SIZE),
chunkOverlap: z.number().nonnegative().default(DEFAULT_CHUNK_OVERLAP),
separator: z.string().default(" "),
backupSeparators: z.array(z.string()).default(["\n"]),
});
export class TokenTextSplitter extends MetadataAwareTextSplitter {
chunkSize: number = DEFAULT_CHUNK_SIZE;
chunkOverlap: number = DEFAULT_CHUNK_OVERLAP;
separator: string = " ";
backupSeparators: string[] = ["\n"];
#tokenizer: Tokenizer;
#splitFns: Array<(text: string) => string[]> = [];
constructor(
params?: SplitterParams & Partial<z.infer<typeof tokenTextSplitterSchema>>,
) {
super();
if (params) {
const parsedParams = tokenTextSplitterSchema.parse(params);
this.chunkSize = parsedParams.chunkSize;
this.chunkOverlap = parsedParams.chunkOverlap;
this.separator = parsedParams.separator;
this.backupSeparators = parsedParams.backupSeparators;
}
if (this.chunkOverlap > this.chunkSize) {
throw new Error(
`Got a larger chunk overlap (${this.chunkOverlap}) than chunk size (${this.chunkSize}), should be smaller.`,
);
}
this.#tokenizer = params?.tokenizer ?? Settings.tokenizer;
const allSeparators = [this.separator, ...this.backupSeparators];
this.#splitFns = allSeparators.map((sep) => splitBySep(sep));
this.#splitFns.push(splitByChar());
}
/**
* Split text into chunks, reserving space required for metadata string.
* @param text The text to split.
* @param metadata The metadata string.
* @returns An array of text chunks.
*/
splitTextMetadataAware(text: string, metadata: string): string[] {
const metadataLength =
this.tokenSize(metadata) + DEFAULT_METADATA_FORMAT_LEN;
const effectiveChunkSize = this.chunkSize - metadataLength;
if (effectiveChunkSize <= 0) {
throw new Error(
`Metadata length (${metadataLength}) is longer than chunk size (${this.chunkSize}). ` +
`Consider increasing the chunk size or decreasing the size of your metadata to avoid this.`,
);
} else if (effectiveChunkSize < 50) {
console.warn(
`Metadata length (${metadataLength}) is close to chunk size (${this.chunkSize}). ` +
`Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.`,
);
}
return this._splitText(text, effectiveChunkSize);
}
/**
* Split text into chunks.
* @param text The text to split.
* @returns An array of text chunks.
*/
splitText(text: string): string[] {
return this._splitText(text, this.chunkSize);
}
/**
* Internal method to split text into chunks up to a specified size.
* @param text The text to split.
* @param chunkSize The maximum size of each chunk.
* @returns An array of text chunks.
*/
private _splitText(text: string, chunkSize: number): string[] {
if (text === "") return [text];
// Dispatch chunking start event
Settings.callbackManager.dispatchEvent("chunking-start", { text: [text] });
const splits = this._split(text, chunkSize);
const chunks = this._merge(splits, chunkSize);
Settings.callbackManager.dispatchEvent("chunking-end", { chunks });
return chunks;
}
/**
* Break text into splits that are smaller than the chunk size.
* @param text The text to split.
* @param chunkSize The maximum size of each split.
* @returns An array of text splits.
*/
private _split(text: string, chunkSize: number): string[] {
if (this.tokenSize(text) <= chunkSize) {
return [text];
}
for (const splitFn of this.#splitFns) {
const splits = splitFn(text);
if (splits.length > 1) {
const newSplits: string[] = [];
for (const split of splits) {
const splitLen = this.tokenSize(split);
if (splitLen <= chunkSize) {
newSplits.push(split);
} else {
newSplits.push(...this._split(split, chunkSize));
}
}
return newSplits;
}
}
return [text];
}
/**
* Merge splits into chunks with overlap.
* @param splits The array of text splits.
* @param chunkSize The maximum size of each chunk.
* @returns An array of merged text chunks.
*/
private _merge(splits: string[], chunkSize: number): string[] {
const chunks: string[] = [];
let currentChunk: string[] = [];
let currentLength = 0;
for (const split of splits) {
const splitLength = this.tokenSize(split);
if (splitLength > chunkSize) {
console.warn(
`Got a split of size ${splitLength}, larger than chunk size ${chunkSize}.`,
);
}
if (currentLength + splitLength > chunkSize) {
const chunk = currentChunk.join("").trim();
if (chunk) {
chunks.push(chunk);
}
currentChunk = [];
currentLength = 0;
const overlapTokens = this.chunkOverlap;
const overlapSplits: string[] = [];
let overlapLength = 0;
while (
overlapSplits.length < splits.length &&
overlapLength < overlapTokens
) {
const overlapSplit = currentChunk.shift();
if (!overlapSplit) break;
overlapSplits.push(overlapSplit);
overlapLength += this.tokenSize(overlapSplit);
}
for (const overlapSplit of overlapSplits.reverse()) {
currentChunk.push(overlapSplit);
currentLength += this.tokenSize(overlapSplit);
if (currentLength >= overlapTokens) break;
}
}
currentChunk.push(split);
currentLength += splitLength;
}
const finalChunk = currentChunk.join("").trim();
if (finalChunk) {
chunks.push(finalChunk);
}
return chunks;
}
/**
* Calculate the number of tokens in the text using the tokenizer.
* @param text The text to tokenize.
* @returns The number of tokens.
*/
private tokenSize(text: string): number {
return this.#tokenizer.encode(text).length;
}
}
+4 -1
View File
@@ -3,7 +3,10 @@ import SentenceTokenizer from "./sentence_tokenizer";
export type TextSplitterFn = (text: string) => string[];
const truncateText = (text: string, textSplitter: TextSplitter): string => {
export const truncateText = (
text: string,
textSplitter: TextSplitter,
): string => {
const chunks = textSplitter.splitText(text);
return chunks[0] ?? text;
};
+4 -2
View File
@@ -64,11 +64,13 @@ export const defaultRefinePrompt: RefinePrompt = new PromptTemplate({
templateVars: ["query", "existingAnswer", "context"],
template: `The original query is as follows: {query}
We have provided an existing answer: {existingAnswer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
------------
{context}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:`,
});
@@ -403,27 +403,27 @@ class MultiModal extends BaseSynthesizer {
}
}
export function getResponseSynthesizer(
mode: ResponseMode,
const modeToSynthesizer = {
compact: CompactAndRefine,
refine: Refine,
tree_summarize: TreeSummarize,
multi_modal: MultiModal,
} as const;
export function getResponseSynthesizer<Mode extends ResponseMode>(
mode: Mode,
options: BaseSynthesizerOptions & {
textQATemplate?: TextQAPrompt;
refineTemplate?: RefinePrompt;
summaryTemplate?: TreeSummarizePrompt;
metadataMode?: MetadataMode;
} = {},
) {
switch (mode) {
case "compact": {
return new CompactAndRefine(options);
}
case "refine": {
return new Refine(options);
}
case "tree_summarize": {
return new TreeSummarize(options);
}
case "multi_modal": {
return new MultiModal(options);
}
): InstanceType<(typeof modeToSynthesizer)[Mode]> {
const Synthesizer: (typeof modeToSynthesizer)[Mode] = modeToSynthesizer[mode];
if (!Synthesizer) {
throw new Error(`Invalid response mode: ${mode}`);
}
return new Synthesizer(options) as InstanceType<
(typeof modeToSynthesizer)[Mode]
>;
}
+2 -2
View File
@@ -479,7 +479,7 @@ export function buildNodeFromSplits(
) {
const imageDoc = doc as ImageNode;
const imageNode = new ImageNode({
id_: idGenerator(i, imageDoc),
id_: imageDoc.id_ ?? idGenerator(i, imageDoc),
text: textChunk,
image: imageDoc.image,
embedding: imageDoc.embedding,
@@ -496,7 +496,7 @@ export function buildNodeFromSplits(
) {
const textDoc = doc as TextNode;
const node = new TextNode({
id_: idGenerator(i, textDoc),
id_: textDoc.id_ ?? idGenerator(i, textDoc),
text: textChunk,
embedding: textDoc.embedding,
excludedEmbedMetadataKeys: [...textDoc.excludedEmbedMetadataKeys],
+1
View File
@@ -80,3 +80,4 @@ export {
} from "./llms";
export { objectEntries } from "./object-entries";
export { UUIDFromString } from "./uuid";
+22
View File
@@ -0,0 +1,22 @@
import { createSHA256 } from "@llamaindex/env";
export function UUIDFromString(input: string) {
const hashFunction = createSHA256();
hashFunction.update(input);
const base64Hash = hashFunction.digest();
// Convert base64 to hex
const hexHash = Buffer.from(base64Hash, "base64").toString("hex");
// Format the hash to resemble a UUID (version 5 style)
const uuid = [
hexHash.substring(0, 8),
hexHash.substring(8, 12),
"5" + hexHash.substring(12, 15), // Set the version to 5 (name-based)
((parseInt(hexHash.substring(15, 17), 16) & 0x3f) | 0x80).toString(16) +
hexHash.substring(17, 19), // Set the variant
hexHash.substring(19, 31),
].join("-");
return uuid;
}
+37
View File
@@ -0,0 +1,37 @@
import { UUIDFromString } from "@llamaindex/core/utils";
import { describe, expect, it } from "vitest";
const UUID_REGEX =
/^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
describe("UUIDFromString", () => {
it("should convert string to UUID", () => {
const string = "document_id_1";
const result = UUIDFromString(string);
expect(result).toBeDefined();
expect(result).toMatch(UUID_REGEX);
});
it("should return the same UUID for the same input string", () => {
const string = "document_id_1";
const result1 = UUIDFromString(string);
const result2 = UUIDFromString(string);
expect(result1).toEqual(result2);
});
it("should return the different UUID for different input strings", () => {
const string1 = "document_id_1";
const string2 = "document_id_2";
const result1 = UUIDFromString(string1);
const result2 = UUIDFromString(string2);
expect(result1).not.toEqual(result2);
});
it("should handle case-sensitive input strings", () => {
const string1 = "document_id_1";
const string2 = "Document_Id_1";
const result1 = UUIDFromString(string1);
const result2 = UUIDFromString(string2);
expect(result1).not.toEqual(result2);
});
});
+64
View File
@@ -166,4 +166,68 @@ describe("Workflow", () => {
greeting: "Hello Alice, you are 30 years old!",
});
});
test("workflow with two concurrent steps", async () => {
const concurrentFlow = new Workflow({ verbose: true });
const step1 = vi.fn(async (_context, _ev: StartEvent) => {
await new Promise((resolve) => setTimeout(resolve, 200));
return new StopEvent({ result: "Step 1 completed" });
});
const step2 = vi.fn(async (_context, _ev: StartEvent) => {
await new Promise((resolve) => setTimeout(resolve, 100));
return new StopEvent({ result: "Step 2 completed" });
});
concurrentFlow.addStep(StartEvent, step1);
concurrentFlow.addStep(StartEvent, step2);
const startTime = new Date();
const result = await concurrentFlow.run("start");
const endTime = new Date();
const duration = endTime.getTime() - startTime.getTime();
expect(step1).toHaveBeenCalledTimes(1);
expect(step2).toHaveBeenCalledTimes(1);
expect(duration).toBeLessThan(200);
expect(result.data.result).toBe("Step 2 completed");
});
test("workflow with two concurrent cyclic steps", async () => {
const concurrentCyclicFlow = new Workflow({ verbose: true });
class Step1Event extends WorkflowEvent {}
class Step2Event extends WorkflowEvent {}
let step2Count = 0;
const step1 = vi.fn(async (_context, ev: StartEvent | Step1Event) => {
await new Promise((resolve) => setTimeout(resolve, 1000));
return new Step1Event({ result: "Step 1 completed" });
});
const step2 = vi.fn(async (_context, ev: StartEvent | Step2Event) => {
await new Promise((resolve) => setTimeout(resolve, 100));
step2Count++;
if (step2Count >= 5) {
return new StopEvent({ result: "Step 2 completed 5 times" });
}
return new Step2Event({ result: "Step 2 completed" });
});
concurrentCyclicFlow.addStep([StartEvent, Step1Event], step1);
concurrentCyclicFlow.addStep([StartEvent, Step2Event], step2);
const startTime = new Date();
const result = await concurrentCyclicFlow.run("start");
const endTime = new Date();
const duration = endTime.getTime() - startTime.getTime();
expect(step1).toHaveBeenCalledTimes(1);
expect(step2).toHaveBeenCalledTimes(5);
expect(duration).toBeGreaterThan(500); // At least 5 * 100ms for step2
expect(duration).toBeLessThan(1000); // Less than 1 second
expect(result.data.result).toBe("Step 2 completed 5 times");
});
});
+43
View File
@@ -1,5 +1,48 @@
# @llamaindex/experimental
## 0.0.95
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.0.94
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.0.93
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.0.92
### Patch Changes
- llamaindex@0.6.14
## 0.0.91
### Patch Changes
- llamaindex@0.6.13
## 0.0.90
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.0.89
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.89",
"version": "0.0.95",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+62
View File
@@ -1,5 +1,67 @@
# llamaindex
## 0.6.17
### Patch Changes
- ee697fb: fix: generate uuid when inserting to Qdrant
- Updated dependencies [ee697fb]
- @llamaindex/core@0.2.11
- @llamaindex/cloud@0.2.13
- @llamaindex/ollama@0.0.6
- @llamaindex/openai@0.1.14
- @llamaindex/groq@0.0.13
## 0.6.16
### Patch Changes
- 63e9846: fix: preFilters does not work with asQueryEngine
- 6f3a31c: feat: add metadata filters for Qdrant vector store
- Updated dependencies [3489e7d]
- Updated dependencies [468bda5]
- @llamaindex/core@0.2.10
- @llamaindex/cloud@0.2.12
- @llamaindex/ollama@0.0.5
- @llamaindex/openai@0.1.13
- @llamaindex/groq@0.0.12
## 0.6.15
### Patch Changes
- 2a82413: fix(core): set `Settings.llm` to OpenAI by default and support lazy load openai
- Updated dependencies [2a82413]
- Updated dependencies [0b20ff9]
- @llamaindex/groq@0.0.11
- @llamaindex/openai@0.1.12
- @llamaindex/cloud@0.2.11
## 0.6.14
### Patch Changes
- Updated dependencies [b17d439]
- @llamaindex/core@0.2.9
- @llamaindex/ollama@0.0.4
- @llamaindex/openai@0.1.11
- @llamaindex/groq@0.0.10
## 0.6.13
### Patch Changes
- Updated dependencies [981811e]
- @llamaindex/cloud@0.2.10
## 0.6.12
### Patch Changes
- f7b4e94: feat: add filters for pinecone
- 78037a6: fix: bypass service context embed model
- 1d9e3b1: fix: export llama reader in non-nodejs runtime
## 0.6.11
### Patch Changes
@@ -1,5 +1,48 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.79
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.0.78
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.0.77
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.0.76
### Patch Changes
- llamaindex@0.6.14
## 0.0.75
### Patch Changes
- llamaindex@0.6.13
## 0.0.74
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.0.73
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.73",
"version": "0.0.79",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,31 @@
# @llamaindex/llama-parse-browser-test
## 0.0.9
### Patch Changes
- @llamaindex/cloud@0.2.13
## 0.0.8
### Patch Changes
- @llamaindex/cloud@0.2.12
## 0.0.7
### Patch Changes
- Updated dependencies [0b20ff9]
- @llamaindex/cloud@0.2.11
## 0.0.6
### Patch Changes
- Updated dependencies [981811e]
- @llamaindex/cloud@0.2.10
## 0.0.5
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llama-parse-browser-test",
"private": true,
"version": "0.0.5",
"version": "0.0.9",
"type": "module",
"scripts": {
"dev": "vite",
@@ -1,5 +1,48 @@
# @llamaindex/next-agent-test
## 0.1.79
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.1.78
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.1.77
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.1.76
### Patch Changes
- llamaindex@0.6.14
## 0.1.75
### Patch Changes
- llamaindex@0.6.13
## 0.1.74
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.1.73
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.73",
"version": "0.1.79",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,48 @@
# test-edge-runtime
## 0.1.78
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.1.77
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.1.76
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.1.75
### Patch Changes
- llamaindex@0.6.14
## 0.1.74
### Patch Changes
- llamaindex@0.6.13
## 0.1.73
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.1.72
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.72",
"version": "0.1.78",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,48 @@
# @llamaindex/next-node-runtime
## 0.0.60
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.0.59
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.0.58
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.0.57
### Patch Changes
- llamaindex@0.6.14
## 0.0.56
### Patch Changes
- llamaindex@0.6.13
## 0.0.55
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.0.54
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.0.54",
"version": "0.0.60",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,48 @@
# @llamaindex/waku-query-engine-test
## 0.0.79
### Patch Changes
- Updated dependencies [ee697fb]
- llamaindex@0.6.17
## 0.0.78
### Patch Changes
- Updated dependencies [63e9846]
- Updated dependencies [6f3a31c]
- llamaindex@0.6.16
## 0.0.77
### Patch Changes
- Updated dependencies [2a82413]
- llamaindex@0.6.15
## 0.0.76
### Patch Changes
- llamaindex@0.6.14
## 0.0.75
### Patch Changes
- llamaindex@0.6.13
## 0.0.74
### Patch Changes
- Updated dependencies [f7b4e94]
- Updated dependencies [78037a6]
- Updated dependencies [1d9e3b1]
- llamaindex@0.6.12
## 0.0.73
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.73",
"version": "0.0.79",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.6.11",
"version": "0.6.17",
"license": "MIT",
"type": "module",
"keywords": [
-7
View File
@@ -2,7 +2,6 @@ import {
type CallbackManager,
Settings as CoreSettings,
} from "@llamaindex/core/global";
import { OpenAI } from "@llamaindex/openai";
import { PromptHelper } from "@llamaindex/core/indices";
@@ -61,12 +60,6 @@ class GlobalSettings implements Config {
}
get llm(): LLM {
// fixme: we might need check internal error instead of try-catch here
try {
CoreSettings.llm;
} catch (error) {
CoreSettings.llm = new OpenAI();
}
return CoreSettings.llm;
}
@@ -308,7 +308,6 @@ export class LlamaCloudIndex {
return new RetrieverQueryEngine(
retriever,
params?.responseSynthesizer,
params?.preFilters,
params?.nodePostprocessors,
);
}
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
import { OpenAIEmbedding } from "@llamaindex/openai";
export class FireworksEmbedding extends OpenAIEmbedding {
constructor(init?: Partial<OpenAIEmbedding>) {
constructor(init?: Omit<Partial<OpenAIEmbedding>, "session">) {
const {
apiKey = getEnv("FIREWORKS_API_KEY"),
additionalSessionOptions = {},
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
import { OpenAIEmbedding } from "@llamaindex/openai";
export class TogetherEmbedding extends OpenAIEmbedding {
constructor(init?: Partial<OpenAIEmbedding>) {
constructor(init?: Omit<Partial<OpenAIEmbedding>, "session">) {
const {
apiKey = getEnv("TOGETHER_API_KEY"),
additionalSessionOptions = {},
@@ -14,12 +14,10 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
retriever: BaseRetriever;
responseSynthesizer: BaseSynthesizer;
nodePostprocessors: BaseNodePostprocessor[];
preFilters?: unknown;
constructor(
retriever: BaseRetriever,
responseSynthesizer?: BaseSynthesizer,
preFilters?: unknown,
nodePostprocessors?: BaseNodePostprocessor[],
) {
super(async (strOrQueryBundle, stream) => {
@@ -52,7 +50,6 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
this.retriever = retriever;
this.responseSynthesizer =
responseSynthesizer || getResponseSynthesizer("compact");
this.preFilters = preFilters;
this.nodePostprocessors = nodePostprocessors || [];
}
+19 -2
View File
@@ -1,3 +1,20 @@
//#region initial setup for OpenAI
import { OpenAI } from "@llamaindex/openai";
import { Settings } from "./Settings.js";
try {
Settings.llm;
} catch {
Settings.llm = new OpenAI();
}
//#endregion
export {
LlamaParseReader,
type Language,
type ResultType,
} from "@llamaindex/cloud/reader";
export * from "@llamaindex/core/agent";
export * from "@llamaindex/core/chat-engine";
export {
@@ -23,12 +40,12 @@ export type {
JSONArray,
JSONObject,
JSONValue,
LlamaIndexEventMaps,
LLMEndEvent,
LLMStartEvent,
LLMStreamEvent,
LLMToolCallEvent,
LLMToolResultEvent,
LlamaIndexEventMaps,
} from "@llamaindex/core/global";
export * from "@llamaindex/core/indices";
export * from "@llamaindex/core/llms";
@@ -56,7 +73,7 @@ export * from "./postprocessors/index.js";
export * from "./QuestionGenerator.js";
export * from "./selectors/index.js";
export * from "./ServiceContext.js";
export { Settings } from "./Settings.js";
export * from "./storage/StorageContext.js";
export * from "./tools/index.js";
export * from "./types.js";
export { Settings };
@@ -246,7 +246,6 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
return new RetrieverQueryEngine(
retriever ?? this.asRetriever(),
responseSynthesizer,
options?.preFilters,
options?.nodePostprocessors,
);
}
@@ -189,7 +189,6 @@ export class SummaryIndex extends BaseIndex<IndexList> {
return new RetrieverQueryEngine(
retriever,
responseSynthesizer,
options?.preFilters,
options?.nodePostprocessors,
);
}
@@ -203,7 +203,10 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
} = {},
): Promise<VectorStoreIndex> {
args.storageContext =
args.storageContext ?? (await storageContextFromDefaults({}));
args.storageContext ??
(await storageContextFromDefaults({
serviceContext: args.serviceContext,
}));
args.vectorStores = args.vectorStores ?? args.storageContext.vectorStores;
args.docStoreStrategy =
args.docStoreStrategy ??
@@ -295,9 +298,8 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
similarityTopK,
} = options ?? {};
return new RetrieverQueryEngine(
retriever ?? this.asRetriever({ similarityTopK }),
retriever ?? this.asRetriever({ similarityTopK, filters: preFilters }),
responseSynthesizer,
preFilters,
nodePostprocessors,
);
}
@@ -384,7 +386,7 @@ export type VectorIndexRetrieverOptions = {
index: VectorStoreIndex;
similarityTopK?: number | undefined;
topK?: TopKMap | undefined;
filters?: MetadataFilters;
filters?: MetadataFilters | undefined;
};
export class VectorIndexRetriever extends BaseRetriever {
+1 -1
View File
@@ -6,7 +6,7 @@ const DEFAULT_MODEL = "mistralai/Mixtral-8x22B-Instruct-v0.1";
const BASE_URL = "https://api.deepinfra.com/v1/openai";
export class DeepInfra extends OpenAI {
constructor(init?: Partial<OpenAI>) {
constructor(init?: Omit<Partial<OpenAI>, "session">) {
const {
apiKey = getEnv(ENV_VARIABLE_NAME),
additionalSessionOptions = {},
+3 -1
View File
@@ -10,7 +10,9 @@ type DeepSeekModelName = keyof typeof DEEPSEEK_MODELS;
const DEFAULT_MODEL: DeepSeekModelName = "deepseek-coder";
export class DeepSeekLLM extends OpenAI {
constructor(init?: Partial<OpenAI> & { model?: DeepSeekModelName }) {
constructor(
init?: Omit<Partial<OpenAI>, "session"> & { model?: DeepSeekModelName },
) {
const {
apiKey = getEnv("DEEPSEEK_API_KEY"),
additionalSessionOptions = {},
+1 -1
View File
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
import { OpenAI } from "@llamaindex/openai";
export class FireworksLLM extends OpenAI {
constructor(init?: Partial<OpenAI>) {
constructor(init?: Omit<Partial<OpenAI>, "session">) {
const {
apiKey = getEnv("FIREWORKS_API_KEY"),
additionalSessionOptions = {},
+1 -1
View File
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
import { OpenAI } from "@llamaindex/openai";
export class TogetherLLM extends OpenAI {
constructor(init?: Partial<OpenAI>) {
constructor(init?: Omit<Partial<OpenAI>, "session">) {
const {
apiKey = getEnv("TOGETHER_API_KEY"),
additionalSessionOptions = {},
@@ -5,6 +5,7 @@ import {
import { ModalityType, ObjectType } from "@llamaindex/core/schema";
import { path } from "@llamaindex/env";
import { getImageEmbedModel } from "../internal/settings/image-embed-model.js";
import type { ServiceContext } from "../ServiceContext.js";
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
@@ -25,6 +26,10 @@ type BuilderParams = {
vectorStores: VectorStoreByType;
storeImages: boolean;
persistDir: string;
/**
* @deprecated Please use `Settings` instead
*/
serviceContext?: ServiceContext | undefined;
};
export async function storageContextFromDefaults({
@@ -34,6 +39,7 @@ export async function storageContextFromDefaults({
vectorStores,
storeImages,
persistDir,
serviceContext,
}: Partial<BuilderParams>): Promise<StorageContext> {
vectorStores = vectorStores ?? {};
if (!persistDir) {
@@ -48,6 +54,7 @@ export async function storageContextFromDefaults({
});
}
} else {
const embedModel = serviceContext?.embedModel;
docStore =
docStore ||
(await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE));
@@ -55,7 +62,8 @@ export async function storageContextFromDefaults({
indexStore || (await SimpleIndexStore.fromPersistDir(persistDir));
if (!(ObjectType.TEXT in vectorStores)) {
vectorStores[ModalityType.TEXT] =
vectorStore ?? (await SimpleVectorStore.fromPersistDir(persistDir));
vectorStore ??
(await SimpleVectorStore.fromPersistDir(persistDir, embedModel));
}
if (storeImages && !(ObjectType.IMAGE in vectorStores)) {
vectorStores[ModalityType.IMAGE] = await SimpleVectorStore.fromPersistDir(
@@ -1,4 +1,6 @@
import {
FilterCondition,
FilterOperator,
VectorStoreBase,
type IEmbedModel,
type MetadataFilter,
@@ -198,14 +200,60 @@ export class PineconeVectorStore
}
toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: MetadataFilter) => {
// Use MetadataFilter with EQ operator to replace ExactMatchFilter
// TODO: support filter with other operators
if (item.operator === "==") {
carry[item.key] = item.value;
if (!stdFilters) return undefined;
const transformCondition = (
condition: `${FilterCondition}` = "and",
): string => {
if (condition === "and") return "$and";
if (condition === "or") return "$or";
throw new Error(`Filter condition ${condition} not supported`);
};
const transformOperator = (operator: `${FilterOperator}`): string => {
switch (operator) {
case "!=":
return "$ne";
case "==":
return "$eq";
case ">":
return "$gt";
case "<":
return "$lt";
case ">=":
return "$gte";
case "<=":
return "$lte";
case "in":
return "$in";
case "nin":
return "$nin";
default:
throw new Error(`Filter operator ${operator} not supported`);
}
return carry;
}, {});
};
const convertFilterItem = (filter: MetadataFilter) => {
return {
[filter.key]: {
[transformOperator(filter.operator)]: filter.value,
},
};
};
const convertFilter = (filter: MetadataFilters) => {
const filtersList = filter.filters
.map((f) => convertFilterItem(f))
.filter((f) => Object.keys(f).length > 0);
if (filtersList.length === 0) return undefined;
if (filtersList.length === 1) return filtersList[0];
const condition = transformCondition(filter.condition);
return { [condition]: filtersList };
};
return convertFilter(stdFilters);
}
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
@@ -1,16 +1,23 @@
import type { BaseNode } from "@llamaindex/core/schema";
import {
FilterCondition,
FilterOperator,
VectorStoreBase,
type IEmbedModel,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import type { QdrantClientParams } from "@qdrant/js-client-rest";
import { UUIDFromString } from "@llamaindex/core/utils";
import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest";
import { QdrantClient } from "@qdrant/js-client-rest";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
type QdrantFilter = Schemas["Filter"];
type QdrantMustConditions = QdrantFilter["must"];
type PointStruct = {
id: string;
payload: Record<string, string>;
@@ -164,7 +171,7 @@ export class QdrantVectorStore
for (let k = 0; k < nodeIds.length; k++) {
const point: PointStruct = {
id: nodeIds[k]!.id_,
id: UUIDFromString(nodeIds[k]!.id_),
payload: payloads[k]!,
vector: vectors[k]!,
};
@@ -272,7 +279,7 @@ export class QdrantVectorStore
): Promise<VectorStoreQueryResult> {
const qdrantFilters = options?.qdrant_filters;
let queryFilters;
let queryFilters: QdrantFilter | undefined;
if (!query.queryEmbedding) {
throw new Error("No query embedding provided");
@@ -281,7 +288,7 @@ export class QdrantVectorStore
if (qdrantFilters) {
queryFilters = qdrantFilters;
} else {
queryFilters = await this.buildQueryFilter(query);
queryFilters = buildQueryFilter(query);
}
const result = (await this.db.search(this.collectionName, {
@@ -292,58 +299,118 @@ export class QdrantVectorStore
return this.parseToQueryResult(result);
}
}
/**
* Qdrant filter builder
* @param query The VectorStoreQuery to be used
*/
private async buildQueryFilter(query: VectorStoreQuery) {
if (!query.docIds && !query.queryStr && !query.filters) {
return null;
}
/**
* Qdrant filter builder
* @param query The VectorStoreQuery to be used
*/
function buildQueryFilter(query: VectorStoreQuery): QdrantFilter | undefined {
if (!query.docIds && !query.queryStr && !query.filters) return undefined;
const mustConditions = [];
const mustConditions: QdrantMustConditions = [];
if (query.docIds) {
mustConditions.push({
key: "doc_id",
match: { any: query.docIds },
});
}
if (query.docIds) {
mustConditions.push({
key: "doc_id",
match: {
any: query.docIds,
},
});
}
const metadataFilters = toQdrantMetadataFilters(query.filters);
if (metadataFilters) {
mustConditions.push(metadataFilters);
}
if (!query.filters) {
return {
must: mustConditions,
};
}
return { must: mustConditions };
}
const metadataFilters = query.filters.filters;
/**
* Converts metadata filters to Qdrant-compatible filters
* @param subFilters The metadata filters to be converted
* @returns A QdrantFilter object or undefined if no valid filters are provided
*/
function toQdrantMetadataFilters(
subFilters?: MetadataFilters,
): QdrantFilter | undefined {
if (!subFilters?.filters.length) return undefined;
for (let i = 0; i < metadataFilters.length; i++) {
const filter = metadataFilters[i]!;
const conditions: QdrantMustConditions = [];
if (typeof filter.key === "number") {
mustConditions.push({
key: filter.key,
match: {
gt: filter.value,
lt: filter.value,
for (const subfilter of subFilters.filters) {
if (subfilter.operator === FilterOperator.EQ) {
if (typeof subfilter.value === "number") {
conditions.push({
key: subfilter.key,
range: {
gte: subfilter.value,
lte: subfilter.value,
},
});
} else {
mustConditions.push({
key: filter.key,
match: {
value: filter.value,
},
conditions.push({
key: subfilter.key,
match: { value: subfilter.value },
});
}
} else if (subfilter.operator === FilterOperator.LT) {
conditions.push({
key: subfilter.key,
range: { lt: subfilter.value },
});
} else if (subfilter.operator === FilterOperator.GT) {
conditions.push({
key: subfilter.key,
range: { gt: subfilter.value },
});
} else if (subfilter.operator === FilterOperator.GTE) {
conditions.push({
key: subfilter.key,
range: { gte: subfilter.value },
});
} else if (subfilter.operator === FilterOperator.LTE) {
conditions.push({
key: subfilter.key,
range: { lte: subfilter.value },
});
} else if (subfilter.operator === FilterOperator.TEXT_MATCH) {
conditions.push({
key: subfilter.key,
match: { text: subfilter.value },
});
} else if (subfilter.operator === FilterOperator.NE) {
conditions.push({
key: subfilter.key,
match: { except: [subfilter.value] },
});
} else if (subfilter.operator === FilterOperator.IN) {
const values = Array.isArray(subfilter.value)
? subfilter.value.map(String)
: String(subfilter.value).split(",");
conditions.push({
key: subfilter.key,
match: { any: values },
});
} else if (subfilter.operator === FilterOperator.NIN) {
const values = Array.isArray(subfilter.value)
? subfilter.value.map(String)
: String(subfilter.value).split(",");
conditions.push({
key: subfilter.key,
match: { except: values },
});
} else if (subfilter.operator === FilterOperator.IS_EMPTY) {
conditions.push({
is_empty: { key: subfilter.key },
});
}
return {
must: mustConditions,
};
}
const filter: QdrantFilter = {};
if (subFilters.condition === FilterCondition.OR) {
filter.should = conditions;
} else {
filter.must = conditions;
}
return filter;
}
@@ -27,7 +27,7 @@ describe("VectorStoreIndex", () => {
runs: number = 2,
): Promise<Array<number>> => {
const documents = [new Document({ text: "lorem ipsem", id_: "1" })];
const entries = [];
const entries: number[] = [];
for (let i = 0; i < runs; i++) {
await VectorStoreIndex.fromDocuments(documents, {
serviceContext,
@@ -43,7 +43,7 @@ describe("VectorStoreIndex", () => {
test("fromDocuments stores duplicates without a doc store strategy", async () => {
const entries = await testStrategy(DocStoreStrategy.NONE);
expect(entries[0]! + 1).toBe(entries[1]);
expect(entries[0]).toBe(entries[1]);
});
test("fromDocuments ignores duplicates with upserts doc store strategy", async () => {
+7
View File
@@ -0,0 +1,7 @@
import { expect, test, vi } from "vitest";
test("init without error", async () => {
vi.stubEnv("OPENAI_API_KEY", undefined);
const { Settings } = await import("llamaindex");
expect(Settings.llm).toBeDefined();
});
+26
View File
@@ -1,5 +1,31 @@
# @llamaindex/groq
## 0.0.13
### Patch Changes
- @llamaindex/openai@0.1.14
## 0.0.12
### Patch Changes
- @llamaindex/openai@0.1.13
## 0.0.11
### Patch Changes
- 2a82413: fix(core): set `Settings.llm` to OpenAI by default and support lazy load openai
- Updated dependencies [2a82413]
- @llamaindex/openai@0.1.12
## 0.0.10
### Patch Changes
- @llamaindex/openai@0.1.11
## 0.0.9
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/groq",
"description": "Groq Adapter for LlamaIndex",
"version": "0.0.9",
"version": "0.0.13",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+6 -5
View File
@@ -4,7 +4,7 @@ import GroqSDK, { type ClientOptions } from "groq-sdk";
export class Groq extends OpenAI {
constructor(
init?: Partial<OpenAI> & {
init?: Omit<Partial<OpenAI>, "session"> & {
additionalSessionOptions?: ClientOptions;
},
) {
@@ -22,9 +22,10 @@ export class Groq extends OpenAI {
...rest,
});
this.session.openai = new GroqSDK({
apiKey,
...init?.additionalSessionOptions,
}) as any;
this.lazySession = async () =>
new GroqSDK({
apiKey,
...init?.additionalSessionOptions,
}) as any;
}
}
+22
View File
@@ -1,5 +1,27 @@
# @llamaindex/ollama
## 0.0.6
### Patch Changes
- Updated dependencies [ee697fb]
- @llamaindex/core@0.2.11
## 0.0.5
### Patch Changes
- Updated dependencies [3489e7d]
- Updated dependencies [468bda5]
- @llamaindex/core@0.2.10
## 0.0.4
### Patch Changes
- Updated dependencies [b17d439]
- @llamaindex/core@0.2.9
## 0.0.3
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/ollama",
"description": "Ollama Adapter for LlamaIndex",
"version": "0.0.3",
"version": "0.0.6",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+28
View File
@@ -1,5 +1,33 @@
# @llamaindex/openai
## 0.1.14
### Patch Changes
- Updated dependencies [ee697fb]
- @llamaindex/core@0.2.11
## 0.1.13
### Patch Changes
- Updated dependencies [3489e7d]
- Updated dependencies [468bda5]
- @llamaindex/core@0.2.10
## 0.1.12
### Patch Changes
- 2a82413: fix(core): set `Settings.llm` to OpenAI by default and support lazy load openai
## 0.1.11
### Patch Changes
- Updated dependencies [b17d439]
- @llamaindex/core@0.2.9
## 0.1.10
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/openai",
"description": "OpenAI Adapter for LlamaIndex",
"version": "0.1.10",
"version": "0.1.14",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+8 -17
View File
@@ -2,11 +2,6 @@ import { getEnv } from "@llamaindex/env";
import type { AzureClientOptions } from "openai";
export interface AzureOpenAIConfig extends AzureClientOptions {
/** @deprecated use "deployment" instead */
deploymentName?: string | undefined;
}
// NOTE we're not supporting the legacy models as they're not available for new deployments
// https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/legacy-models
// If you have a need for them, please open an issue on GitHub
@@ -85,14 +80,15 @@ const DEFAULT_API_VERSION = "2023-05-15";
//^ NOTE: this will change over time, if you want to pin it, use a specific version
export function getAzureConfigFromEnv(
init?: Partial<AzureOpenAIConfig> & { model?: string },
): AzureOpenAIConfig {
init?: Partial<AzureClientOptions> & { model?: string },
): AzureClientOptions {
const deployment =
init?.deploymentName ??
init?.deployment ??
getEnv("AZURE_OPENAI_DEPLOYMENT") ?? // From Azure docs
getEnv("AZURE_OPENAI_API_DEPLOYMENT_NAME") ?? // LCJS compatible
init?.model; // Fall back to model name, Python compatible
init && "deploymentName" in init && typeof init.deploymentName === "string"
? init?.deploymentName
: (init?.deployment ??
getEnv("AZURE_OPENAI_DEPLOYMENT") ?? // From Azure docs
getEnv("AZURE_OPENAI_API_DEPLOYMENT_NAME") ?? // LCJS compatible
init?.model); // Fall back to model name, Python compatible
return {
apiKey:
init?.apiKey ??
@@ -110,15 +106,10 @@ export function getAzureConfigFromEnv(
getEnv("OPENAI_API_VERSION") ?? // Python compatible
getEnv("AZURE_OPENAI_API_VERSION") ?? // LCJS compatible
DEFAULT_API_VERSION,
deploymentName: deployment, // LCJS compatible
deployment, // For Azure OpenAI
};
}
export function getAzureBaseUrl(config: AzureOpenAIConfig): string {
return `${config.endpoint}/openai/deployments/${config.deploymentName}`;
}
export function getAzureModel(openAIModel: string) {
for (const [key, value] of Object.entries(
ALL_AZURE_OPENAI_EMBEDDING_MODELS,
+52 -29
View File
@@ -1,14 +1,16 @@
import { BaseEmbedding } from "@llamaindex/core/embeddings";
import { Tokenizers } from "@llamaindex/env";
import type { ClientOptions as OpenAIClientOptions } from "openai";
import type { AzureOpenAIConfig } from "./azure.js";
import { getEnv, Tokenizers } from "@llamaindex/env";
import type {
AzureClientOptions,
AzureOpenAI as AzureOpenAILLM,
ClientOptions as OpenAIClientOptions,
OpenAI as OpenAILLM,
} from "openai";
import {
getAzureConfigFromEnv,
getAzureModel,
shouldUseAzure,
} from "./azure.js";
import type { OpenAISession } from "./llm.js";
import { getOpenAISession } from "./llm.js";
export const ALL_OPENAI_EMBEDDING_MODELS = {
"text-embedding-ada-002": {
@@ -32,6 +34,8 @@ export const ALL_OPENAI_EMBEDDING_MODELS = {
type ModelKeys = keyof typeof ALL_OPENAI_EMBEDDING_MODELS;
type LLMInstance = Pick<AzureOpenAILLM | OpenAILLM, "embeddings" | "apiKey">;
export class OpenAIEmbedding extends BaseEmbedding {
/** embeddding model. defaults to "text-embedding-ada-002" */
model: string;
@@ -51,14 +55,26 @@ export class OpenAIEmbedding extends BaseEmbedding {
| Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">
| undefined;
/** session object */
session: OpenAISession;
// use lazy here to avoid check OPENAI_API_KEY immediately
lazySession: () => Promise<LLMInstance>;
#session: Promise<LLMInstance> | null = null;
get session() {
if (!this.#session) {
this.#session = this.lazySession();
}
return this.#session;
}
/**
* OpenAI Embedding
* @param init - initial parameters
*/
constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) {
constructor(
init?: Omit<Partial<OpenAIEmbedding>, "lazySession"> & {
session?: LLMInstance | undefined;
azure?: AzureClientOptions;
},
) {
super();
this.model = init?.model ?? "text-embedding-ada-002";
@@ -77,7 +93,6 @@ export class OpenAIEmbedding extends BaseEmbedding {
if (key) {
this.embedInfo = ALL_OPENAI_EMBEDDING_MODELS[key];
}
if (init?.azure || shouldUseAzure()) {
const azureConfig = {
...getAzureConfigFromEnv({
@@ -85,26 +100,32 @@ export class OpenAIEmbedding extends BaseEmbedding {
}),
...init?.azure,
};
this.apiKey = azureConfig.apiKey;
this.session =
init?.session ??
getOpenAISession({
azure: true,
maxRetries: this.maxRetries,
timeout: this.timeout,
...this.additionalSessionOptions,
...azureConfig,
});
this.apiKey =
init?.session?.apiKey ?? azureConfig.apiKey ?? getEnv("OPENAI_API_KEY");
this.lazySession = async () =>
import("openai").then(
async ({ AzureOpenAI }) =>
init?.session ??
new AzureOpenAI({
maxRetries: this.maxRetries,
timeout: this.timeout!,
...this.additionalSessionOptions,
...azureConfig,
}),
);
} else {
this.apiKey = init?.apiKey ?? undefined;
this.session =
init?.session ??
getOpenAISession({
apiKey: this.apiKey,
maxRetries: this.maxRetries,
timeout: this.timeout,
...this.additionalSessionOptions,
this.apiKey = init?.session?.apiKey ?? getEnv("OPENAI_API_KEY");
this.lazySession = async () =>
import("openai").then(({ OpenAI }) => {
return (
init?.session ??
new OpenAI({
apiKey: this.apiKey,
maxRetries: this.maxRetries,
timeout: this.timeout!,
...this.additionalSessionOptions,
})
);
});
}
}
@@ -118,7 +139,9 @@ export class OpenAIEmbedding extends BaseEmbedding {
// TODO: ensure this for every sub class by calling it in the base class
input = this.truncateMaxTokens(input);
const { data } = await this.session.openai.embeddings.create(
const { data } = await (
await this.session
).embeddings.create(
this.dimensions
? {
model: this.model,
-3
View File
@@ -10,9 +10,6 @@ export {
GPT4_MODELS,
O1_MODELS,
OpenAI,
OpenAISession,
type OpenAIAdditionalChatOptions,
type OpenAIAdditionalMetadata,
} from "./llm";
export { type AzureOpenAIConfig } from "./azure";
+45 -80
View File
@@ -1,12 +1,11 @@
import { getEnv } from "@llamaindex/env";
import type OpenAILLM from "openai";
import type {
ClientOptions,
AzureClientOptions,
AzureOpenAI as AzureOpenAILLM,
ClientOptions as OpenAIClientOptions,
OpenAI as OpenAILLM,
} from "openai";
import { AzureOpenAI, OpenAI as OrigOpenAI } from "openai";
import type { ChatModel } from "openai/resources/chat/chat";
import { isDeepEqual } from "remeda";
import { wrapEventCaller, wrapLLMEvent } from "@llamaindex/core/decorator";
import {
@@ -35,64 +34,12 @@ import type {
ChatCompletionUserMessageParam,
} from "openai/resources/chat/completions";
import type { ChatCompletionMessageParam } from "openai/resources/index.js";
import type { AzureOpenAIConfig } from "./azure.js";
import {
getAzureConfigFromEnv,
getAzureModel,
shouldUseAzure,
} from "./azure.js";
export class OpenAISession {
openai: Pick<OrigOpenAI, "chat" | "embeddings">;
constructor(options: ClientOptions & { azure?: boolean } = {}) {
if (options.azure) {
this.openai = new AzureOpenAI(options as AzureOpenAIConfig);
} else {
if (!options.apiKey) {
options.apiKey = getEnv("OPENAI_API_KEY");
}
if (!options.apiKey) {
throw new Error("Set OpenAI Key in OPENAI_API_KEY env variable"); // Overriding OpenAI package's error message
}
this.openai = new OrigOpenAI({
...options,
});
}
}
}
// I'm not 100% sure this is necessary vs. just starting a new session
// every time we make a call. They say they try to reuse connections
// so in theory this is more efficient, but we should test it in the future.
const defaultOpenAISession: {
session: OpenAISession;
options: ClientOptions;
}[] = [];
/**
* Get a session for the OpenAI API. If one already exists with the same options,
* it will be returned. Otherwise, a new session will be created.
* @param options
* @returns
*/
export function getOpenAISession(
options: ClientOptions & { azure?: boolean } = {},
) {
let session = defaultOpenAISession.find((session) => {
return isDeepEqual(session.options, options);
})?.session;
if (!session) {
session = new OpenAISession(options);
defaultOpenAISession.push({ session, options });
}
return session;
}
export const GPT4_MODELS = {
"chatgpt-4o-latest": {
contextWindow: 128000,
@@ -182,6 +129,8 @@ export type OpenAIAdditionalChatOptions = Omit<
| "toolChoice"
>;
type LLMInstance = Pick<AzureOpenAILLM | OpenAILLM, "chat" | "apiKey">;
export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
model:
| ChatModel
@@ -196,14 +145,24 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
apiKey?: string | undefined = undefined;
maxRetries: number;
timeout?: number;
session: OpenAISession;
additionalSessionOptions?:
| undefined
| Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">;
// use lazy here to avoid check OPENAI_API_KEY immediately
lazySession: () => Promise<LLMInstance>;
#session: Promise<LLMInstance> | null = null;
get session() {
if (!this.#session) {
this.#session = this.lazySession();
}
return this.#session;
}
constructor(
init?: Partial<OpenAI> & {
azure?: AzureOpenAIConfig;
init?: Omit<Partial<OpenAI>, "session"> & {
session?: LLMInstance | undefined;
azure?: AzureClientOptions;
},
) {
super();
@@ -216,6 +175,8 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
this.additionalChatOptions = init?.additionalChatOptions;
this.additionalSessionOptions = init?.additionalSessionOptions;
this.apiKey =
init?.session?.apiKey ?? init?.apiKey ?? getEnv("OPENAI_API_KEY");
if (init?.azure || shouldUseAzure()) {
const azureConfig = {
@@ -225,25 +186,26 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
...init?.azure,
};
this.apiKey = azureConfig.apiKey;
this.session =
this.lazySession = async () =>
init?.session ??
getOpenAISession({
azure: true,
maxRetries: this.maxRetries,
timeout: this.timeout,
...this.additionalSessionOptions,
...azureConfig,
import("openai").then(({ AzureOpenAI }) => {
return new AzureOpenAI({
maxRetries: this.maxRetries,
timeout: this.timeout!,
...this.additionalSessionOptions,
...azureConfig,
});
});
} else {
this.apiKey = init?.apiKey ?? undefined;
this.session =
this.lazySession = async () =>
init?.session ??
getOpenAISession({
apiKey: this.apiKey,
maxRetries: this.maxRetries,
timeout: this.timeout,
...this.additionalSessionOptions,
import("openai").then(({ OpenAI }) => {
return new OpenAI({
apiKey: this.apiKey,
maxRetries: this.maxRetries,
timeout: this.timeout!,
...this.additionalSessionOptions,
});
});
}
}
@@ -382,7 +344,9 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
}
// Non-streaming
const response = await this.session.openai.chat.completions.create({
const response = await (
await this.session
).chat.completions.create({
...baseRequestParams,
stream: false,
});
@@ -414,11 +378,12 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
protected async *streamChat(
baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams,
): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> {
const stream: AsyncIterable<OpenAILLM.Chat.ChatCompletionChunk> =
await this.session.openai.chat.completions.create({
...baseRequestParams,
stream: true,
});
const stream: AsyncIterable<OpenAILLM.Chat.ChatCompletionChunk> = await (
await this.session
).chat.completions.create({
...baseRequestParams,
stream: true,
});
// TODO: add callback to streamConverter and use streamConverter here
// this will be used to keep track of the current tool call, make sure input are valid json object.
+4 -62
View File
@@ -360,10 +360,10 @@ importers:
specifier: ^0.53.0
version: 0.53.0(typescript@5.6.2)
'@llamaindex/core':
specifier: workspace:^0.2.8
specifier: workspace:*
version: link:../core
'@llamaindex/env':
specifier: workspace:^0.1.13
specifier: workspace:*
version: link:../env
bunchee:
specifier: 5.3.2
@@ -19518,8 +19518,8 @@ snapshots:
'@typescript-eslint/parser': 7.2.0(eslint@8.57.0)(typescript@5.6.2)
eslint: 8.57.0
eslint-import-resolver-node: 0.3.9
eslint-import-resolver-typescript: 3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0)
eslint-plugin-import: 2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0))(eslint@8.57.0)
eslint-import-resolver-typescript: 3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1)(eslint@8.57.0)
eslint-plugin-import: 2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.0)
eslint-plugin-jsx-a11y: 6.9.0(eslint@8.57.0)
eslint-plugin-react: 7.35.0(eslint@8.57.0)
eslint-plugin-react-hooks: 4.6.2(eslint@8.57.0)
@@ -19566,25 +19566,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0):
dependencies:
'@nolyfill/is-core-module': 1.0.39
debug: 4.3.7
enhanced-resolve: 5.17.1
eslint: 8.57.0
eslint-module-utils: 2.8.2(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0))(eslint@8.57.0)
fast-glob: 3.3.2
get-tsconfig: 4.8.0
is-bun-module: 1.1.0
is-glob: 4.0.3
optionalDependencies:
eslint-plugin-import: 2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0))(eslint@8.57.0)
transitivePeerDependencies:
- '@typescript-eslint/parser'
- eslint-import-resolver-node
- eslint-import-resolver-webpack
- supports-color
eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1)(eslint@8.57.0):
dependencies:
'@nolyfill/is-core-module': 1.0.39
@@ -19604,17 +19585,6 @@ snapshots:
- eslint-import-resolver-webpack
- supports-color
eslint-module-utils@2.8.2(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0))(eslint@8.57.0):
dependencies:
debug: 3.2.7
optionalDependencies:
'@typescript-eslint/parser': 7.2.0(eslint@8.57.0)(typescript@5.6.2)
eslint: 8.57.0
eslint-import-resolver-node: 0.3.9
eslint-import-resolver-typescript: 3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0)
transitivePeerDependencies:
- supports-color
eslint-module-utils@2.8.2(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1)(eslint@8.57.0))(eslint@8.57.0):
dependencies:
debug: 3.2.7
@@ -19626,33 +19596,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0))(eslint@8.57.0):
dependencies:
array-includes: 3.1.8
array.prototype.findlastindex: 1.2.5
array.prototype.flat: 1.3.2
array.prototype.flatmap: 1.3.2
debug: 3.2.7
doctrine: 2.1.0
eslint: 8.57.0
eslint-import-resolver-node: 0.3.9
eslint-module-utils: 2.8.2(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0))(eslint@8.57.0))(eslint@8.57.0)
hasown: 2.0.2
is-core-module: 2.15.1
is-glob: 4.0.3
minimatch: 3.1.2
object.fromentries: 2.0.8
object.groupby: 1.0.3
object.values: 1.2.0
semver: 6.3.1
tsconfig-paths: 3.15.0
optionalDependencies:
'@typescript-eslint/parser': 7.2.0(eslint@8.57.0)(typescript@5.6.2)
transitivePeerDependencies:
- eslint-import-resolver-typescript
- eslint-import-resolver-webpack
- supports-color
eslint-plugin-import@2.29.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.6.2))(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.0):
dependencies:
array-includes: 3.1.8
@@ -19679,7 +19622,6 @@ snapshots:
- eslint-import-resolver-typescript
- eslint-import-resolver-webpack
- supports-color
optional: true
eslint-plugin-import@2.29.1(@typescript-eslint/parser@8.5.0(eslint@8.57.0)(typescript@5.6.2))(eslint@8.57.0):
dependencies: