mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-01 22:14:03 -04:00
Compare commits
48 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f3e0d07f48 | |||
| 1364e8eeed | |||
| 96fc69cc61 | |||
| 3b7736f763 | |||
| a7a7afe66e | |||
| c646ee2eca | |||
| 5729bd92fd | |||
| e0e52cf879 | |||
| 6f75306c17 | |||
| 94cb4ad810 | |||
| 1ea4014746 | |||
| 6a9a7b1458 | |||
| 1c168cd531 | |||
| 62cba5236d | |||
| d265e96420 | |||
| d30bbf799f | |||
| 53fd00a7c3 | |||
| 83f2848d47 | |||
| 313071e9cd | |||
| 5f6782038a | |||
| fe08d0451b | |||
| 59c5e5c3d4 | |||
| ee697fb1b3 | |||
| cf3320a4ea | |||
| f2ed69f2f8 | |||
| 3489e7de84 | |||
| 468bda594e | |||
| 6f3a31caf6 | |||
| 63e9846e97 | |||
| b7382b0d24 | |||
| 2a8241328d | |||
| 0b20ff9f17 | |||
| 1fc26046e3 | |||
| b17d439d6d | |||
| 040160c360 | |||
| 981811efd1 | |||
| d563b45a27 | |||
| 2774e80234 | |||
| 449274ca5a | |||
| 78037a664c | |||
| 1d9e3b1000 | |||
| df83e32107 | |||
| f7b4e94231 | |||
| 4c07a2655d | |||
| 5c0c8b2ec4 | |||
| e5e18688a6 | |||
| b6fb10eba8 | |||
| df441e28f4 |
@@ -1,5 +1,105 @@
|
||||
# docs
|
||||
|
||||
## 0.0.92
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/examples@0.0.9
|
||||
|
||||
## 0.0.91
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.90
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.89
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.88
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.87
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.86
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.0.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.0.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.0.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.0.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -13,7 +13,7 @@ Official documentation for LlamaParse can be found [here](https://docs.cloud.lla
|
||||
## Usage
|
||||
|
||||
You can then use the `LlamaParseReader` class to load local files and convert them into a parsed document that can be used by LlamaIndex.
|
||||
See [LlamaParseReader.ts](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/llamaindex/src/readers/LlamaParseReader.ts) for a list of supported file types:
|
||||
See [reader.ts](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/cloud/src/reader.ts) for a list of supported file types:
|
||||
|
||||
<CodeBlock language="ts">{CodeSource}</CodeBlock>
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docs",
|
||||
"version": "0.0.79",
|
||||
"version": "0.0.92",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"docusaurus": "docusaurus",
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
# examples
|
||||
|
||||
## 0.0.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/core@0.3.0
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -13,7 +13,7 @@ import { FunctionTool, OpenAI, ToolCallOptions } from "llamaindex";
|
||||
}
|
||||
})();
|
||||
|
||||
async function callLLM(init: Partial<OpenAI>) {
|
||||
async function callLLM(init: { model: string }) {
|
||||
const csvData =
|
||||
"Country,Average Height (cm)\nNetherlands,156\nDenmark,158\nNorway,160";
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import {
|
||||
AstraDBVectorStore,
|
||||
Document,
|
||||
MetadataFilters,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
@@ -42,8 +43,10 @@ async function main() {
|
||||
const index = await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
|
||||
const queryEngine = index.asQueryEngine();
|
||||
const preFilters: MetadataFilters = {
|
||||
filters: [{ key: "id", operator: "in", value: [123, 789] }],
|
||||
}; // try changing the filters to see the different results
|
||||
const queryEngine = index.asQueryEngine({ preFilters });
|
||||
const response = await queryEngine.query({
|
||||
query: "Describe AstraDB.",
|
||||
});
|
||||
|
||||
@@ -1,57 +1,83 @@
|
||||
import {
|
||||
ChromaVectorStore,
|
||||
Document,
|
||||
MetadataFilters,
|
||||
VectorStoreIndex,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
|
||||
const collectionName = "dog_colors";
|
||||
const collectionName = "dogs_with_color";
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const docs = [
|
||||
new Document({
|
||||
text: "The dog is brown",
|
||||
metadata: {
|
||||
dogId: "1",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
text: "The dog is red",
|
||||
metadata: {
|
||||
dogId: "2",
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
console.log("Creating ChromaDB vector store");
|
||||
const chromaVS = new ChromaVectorStore({ collectionName });
|
||||
const ctx = await storageContextFromDefaults({ vectorStore: chromaVS });
|
||||
const index = await VectorStoreIndex.fromVectorStore(chromaVS);
|
||||
|
||||
console.log("Embedding documents and adding to index");
|
||||
const index = await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
const queryFn = async (filters?: MetadataFilters) => {
|
||||
console.log("\nQuerying dogs by filters: ", JSON.stringify(filters));
|
||||
const query = "List all colors of dogs";
|
||||
const queryEngine = index.asQueryEngine({
|
||||
preFilters: filters,
|
||||
similarityTopK: 3,
|
||||
});
|
||||
const response = await queryEngine.query({ query });
|
||||
console.log(response.toString());
|
||||
};
|
||||
|
||||
console.log("Querying index");
|
||||
const queryEngine = index.asQueryEngine({
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "dogId",
|
||||
value: "2",
|
||||
operator: "==",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const response = await queryEngine.query({
|
||||
query: "What is the color of the dog?",
|
||||
});
|
||||
console.log(response.toString());
|
||||
await queryFn(); // red, brown, yellow
|
||||
await queryFn({ filters: [{ key: "dogId", value: "1", operator: "==" }] }); // brown
|
||||
await queryFn({ filters: [{ key: "dogId", value: "1", operator: "!=" }] }); // red, yellow
|
||||
await queryFn({
|
||||
filters: [
|
||||
{ key: "dogId", value: "1", operator: "==" },
|
||||
{ key: "dogId", value: "3", operator: "==" },
|
||||
],
|
||||
condition: "or",
|
||||
}); // brown, yellow
|
||||
await queryFn({
|
||||
filters: [{ key: "dogId", value: ["1", "2"], operator: "in" }],
|
||||
}); // red, brown
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
void main();
|
||||
async function generate() {
|
||||
const docs = [
|
||||
new Document({
|
||||
id_: "doc1",
|
||||
text: "The dog is brown",
|
||||
metadata: {
|
||||
dogId: "1",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
id_: "doc2",
|
||||
text: "The dog is red",
|
||||
metadata: {
|
||||
dogId: "2",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
id_: "doc3",
|
||||
text: "The dog is yellow",
|
||||
metadata: {
|
||||
dogId: "3",
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
console.log("Creating ChromaDB vector store");
|
||||
const chromaVS = new ChromaVectorStore({ collectionName });
|
||||
const ctx = await storageContextFromDefaults({ vectorStore: chromaVS });
|
||||
|
||||
console.log("Embedding documents and adding to index");
|
||||
await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await generate();
|
||||
await main();
|
||||
})();
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
import {
|
||||
Document,
|
||||
MetadataFilters,
|
||||
Settings,
|
||||
SimpleDocumentStore,
|
||||
VectorStoreIndex,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
|
||||
async function getDataSource() {
|
||||
const docs = [
|
||||
new Document({ text: "The dog is brown", metadata: { dogId: "1" } }),
|
||||
new Document({ text: "The dog is yellow", metadata: { dogId: "2" } }),
|
||||
];
|
||||
const storageContext = await storageContextFromDefaults({
|
||||
persistDir: "./cache",
|
||||
});
|
||||
const numberOfDocs = Object.keys(
|
||||
(storageContext.docStore as SimpleDocumentStore).toDict(),
|
||||
).length;
|
||||
if (numberOfDocs === 0) {
|
||||
return await VectorStoreIndex.fromDocuments(docs, { storageContext });
|
||||
}
|
||||
return await VectorStoreIndex.init({
|
||||
storageContext,
|
||||
});
|
||||
}
|
||||
|
||||
Settings.callbackManager.on("retrieve-end", (event) => {
|
||||
const { nodes, query } = event.detail;
|
||||
console.log(`${query.query} - Number of retrieved nodes:`, nodes.length);
|
||||
});
|
||||
|
||||
async function main() {
|
||||
const index = await getDataSource();
|
||||
const filters: MetadataFilters = {
|
||||
filters: [{ key: "dogId", value: "2", operator: "==" }],
|
||||
};
|
||||
|
||||
const retriever = index.asRetriever({ similarityTopK: 3, filters });
|
||||
const queryEngine = index.asQueryEngine({
|
||||
similarityTopK: 3,
|
||||
preFilters: filters,
|
||||
});
|
||||
|
||||
console.log("Retriever and query engine should only retrieve 1 node:");
|
||||
await retriever.retrieve({ query: "Retriever: get dog" });
|
||||
await queryEngine.query({ query: "QueryEngine: get dog" });
|
||||
}
|
||||
|
||||
void main();
|
||||
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@llamaindex/examples",
|
||||
"private": true,
|
||||
"version": "0.0.8",
|
||||
"version": "0.0.9",
|
||||
"dependencies": {
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@llamaindex/core": "^0.2.0",
|
||||
"@llamaindex/core": "^0.3.0",
|
||||
"@notionhq/client": "^2.2.15",
|
||||
"@pinecone-database/pinecone": "^3.0.2",
|
||||
"@vercel/postgres": "^0.10.0",
|
||||
@@ -15,7 +15,7 @@
|
||||
"commander": "^12.1.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"llamaindex": "^0.6.0",
|
||||
"llamaindex": "^0.7.0",
|
||||
"mongodb": "^6.7.0",
|
||||
"pathe": "^1.1.2",
|
||||
"postgres": "^3.4.4"
|
||||
|
||||
@@ -39,6 +39,12 @@ async function main() {
|
||||
dogId: "2",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
text: "The dog is black",
|
||||
metadata: {
|
||||
dogId: "3",
|
||||
},
|
||||
}),
|
||||
];
|
||||
console.log("Creating QdrantDB vector store");
|
||||
const qdrantVs = new QdrantVectorStore({ url: qdrantUrl, collectionName });
|
||||
@@ -73,6 +79,42 @@ async function main() {
|
||||
query: "What is the color of the dog?",
|
||||
});
|
||||
console.log("Filter with dogId 2 response:", response.toString());
|
||||
|
||||
console.log("Querying index with dogId !=2: Expected output: Not red");
|
||||
const queryEngineNotDogId2 = index.asQueryEngine({
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "dogId",
|
||||
value: "2",
|
||||
operator: "!=",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const responseNotDogId2 = await queryEngineNotDogId2.query({
|
||||
query: "What is the color of the dog?",
|
||||
});
|
||||
console.log(responseNotDogId2.toString());
|
||||
|
||||
console.log(
|
||||
"Querying index with dogId 2 or 3: Expected output: Red, Black",
|
||||
);
|
||||
const queryEngineIn = index.asQueryEngine({
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "dogId",
|
||||
value: ["2", "3"],
|
||||
operator: "in",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const responseIn = await queryEngineIn.query({
|
||||
query: "List all dogs",
|
||||
});
|
||||
console.log(responseIn.toString());
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
|
||||
@@ -25,12 +25,9 @@ async function main() {
|
||||
similarityCutoff: 0.7,
|
||||
});
|
||||
// TODO: cannot pass responseSynthesizer into retriever query engine
|
||||
const queryEngine = new RetrieverQueryEngine(
|
||||
retriever,
|
||||
undefined,
|
||||
undefined,
|
||||
[nodePostprocessor],
|
||||
);
|
||||
const queryEngine = new RetrieverQueryEngine(retriever, undefined, [
|
||||
nodePostprocessor,
|
||||
]);
|
||||
|
||||
const response = await queryEngine.query({
|
||||
query: "What did the author do growing up?",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
BaseVectorStore,
|
||||
getResponseSynthesizer,
|
||||
OpenAI,
|
||||
OpenAIEmbedding,
|
||||
@@ -6,7 +7,6 @@ import {
|
||||
Settings,
|
||||
TextNode,
|
||||
VectorIndexRetriever,
|
||||
VectorStore,
|
||||
VectorStoreIndex,
|
||||
VectorStoreQuery,
|
||||
VectorStoreQueryResult,
|
||||
@@ -24,7 +24,7 @@ Settings.llm = new OpenAI({
|
||||
* Please do not use this class in production; it's only for demonstration purposes.
|
||||
*/
|
||||
class PineconeVectorStore<T extends RecordMetadata = RecordMetadata>
|
||||
implements VectorStore
|
||||
implements BaseVectorStore
|
||||
{
|
||||
storesText = true;
|
||||
isEmbeddingQuery = false;
|
||||
@@ -165,9 +165,7 @@ async function main() {
|
||||
});
|
||||
|
||||
const responseSynthesizer = getResponseSynthesizer("tree_summarize");
|
||||
return new RetrieverQueryEngine(retriever, responseSynthesizer, {
|
||||
filter,
|
||||
});
|
||||
return new RetrieverQueryEngine(retriever, responseSynthesizer);
|
||||
};
|
||||
|
||||
// whatever is a key from your metadata
|
||||
|
||||
@@ -1,5 +1,105 @@
|
||||
# @llamaindex/autotool
|
||||
|
||||
## 4.0.0
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 3.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 3.0.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 3.0.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 3.0.19
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 3.0.18
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 3.0.17
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 3.0.16
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 3.0.15
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 3.0.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 3.0.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 3.0.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 3.0.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 3.0.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,117 @@
|
||||
# @llamaindex/autotool-01-node-example
|
||||
|
||||
## 0.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/autotool@4.0.0
|
||||
|
||||
## 0.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
- @llamaindex/autotool@3.0.22
|
||||
|
||||
## 0.0.30
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
- @llamaindex/autotool@3.0.21
|
||||
|
||||
## 0.0.29
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
- @llamaindex/autotool@3.0.20
|
||||
|
||||
## 0.0.28
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
- @llamaindex/autotool@3.0.19
|
||||
|
||||
## 0.0.27
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
- @llamaindex/autotool@3.0.18
|
||||
|
||||
## 0.0.26
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
- @llamaindex/autotool@3.0.17
|
||||
|
||||
## 0.0.25
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
- @llamaindex/autotool@3.0.16
|
||||
|
||||
## 0.0.24
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
- @llamaindex/autotool@3.0.15
|
||||
|
||||
## 0.0.23
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
- @llamaindex/autotool@3.0.14
|
||||
|
||||
## 0.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
- @llamaindex/autotool@3.0.13
|
||||
|
||||
## 0.0.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
- @llamaindex/autotool@3.0.12
|
||||
|
||||
## 0.0.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/autotool@3.0.11
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.0.19
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -13,5 +13,5 @@
|
||||
"scripts": {
|
||||
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
|
||||
},
|
||||
"version": "0.0.19"
|
||||
"version": "0.0.32"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,117 @@
|
||||
# @llamaindex/autotool-02-next-example
|
||||
|
||||
## 0.1.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/autotool@4.0.0
|
||||
|
||||
## 0.1.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
- @llamaindex/autotool@3.0.22
|
||||
|
||||
## 0.1.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
- @llamaindex/autotool@3.0.21
|
||||
|
||||
## 0.1.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
- @llamaindex/autotool@3.0.20
|
||||
|
||||
## 0.1.72
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
- @llamaindex/autotool@3.0.19
|
||||
|
||||
## 0.1.71
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
- @llamaindex/autotool@3.0.18
|
||||
|
||||
## 0.1.70
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
- @llamaindex/autotool@3.0.17
|
||||
|
||||
## 0.1.69
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
- @llamaindex/autotool@3.0.16
|
||||
|
||||
## 0.1.68
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
- @llamaindex/autotool@3.0.15
|
||||
|
||||
## 0.1.67
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
- @llamaindex/autotool@3.0.14
|
||||
|
||||
## 0.1.66
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
- @llamaindex/autotool@3.0.13
|
||||
|
||||
## 0.1.65
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
- @llamaindex/autotool@3.0.12
|
||||
|
||||
## 0.1.64
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/autotool@3.0.11
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.1.63
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool-02-next-example",
|
||||
"private": true,
|
||||
"version": "0.1.63",
|
||||
"version": "0.1.76",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool",
|
||||
"type": "module",
|
||||
"version": "3.0.10",
|
||||
"version": "4.0.0",
|
||||
"description": "auto transpile your JS function to LLM Agent compatible",
|
||||
"files": [
|
||||
"dist",
|
||||
|
||||
@@ -1,5 +1,56 @@
|
||||
# @llamaindex/cloud
|
||||
|
||||
## 1.0.0
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- @llamaindex/core@0.3.0
|
||||
|
||||
## 0.2.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- @llamaindex/core@0.2.12
|
||||
|
||||
## 0.2.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- @llamaindex/core@0.2.11
|
||||
|
||||
## 0.2.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [3489e7d]
|
||||
- Updated dependencies [468bda5]
|
||||
- @llamaindex/core@0.2.10
|
||||
|
||||
## 0.2.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 0b20ff9: fix: package.json format
|
||||
|
||||
## 0.2.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 981811e: fix(cloud): llama parse reader save image incorrectly
|
||||
|
||||
## 0.2.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/core@0.2.8
|
||||
- @llamaindex/env@0.1.13
|
||||
|
||||
## 0.2.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloud",
|
||||
"version": "0.2.8",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
@@ -51,13 +51,13 @@
|
||||
"devDependencies": {
|
||||
"@hey-api/client-fetch": "^0.2.4",
|
||||
"@hey-api/openapi-ts": "^0.53.0",
|
||||
"@llamaindex/core": "workspace:^0.2.3",
|
||||
"@llamaindex/env": "workspace:^0.1.11",
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*",
|
||||
"bunchee": "5.3.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@llamaindex/core": "workspace:^0.2.3",
|
||||
"@llamaindex/env": "workspace:^0.1.11"
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*"
|
||||
},
|
||||
"dependencies": {
|
||||
"magic-bytes.js": "^1.10.0"
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { createClient, createConfig, type Client } from "@hey-api/client-fetch";
|
||||
import { type Client, createClient, createConfig } from "@hey-api/client-fetch";
|
||||
import { Document, FileReader } from "@llamaindex/core/schema";
|
||||
import { fs, getEnv } from "@llamaindex/env";
|
||||
import { fs, getEnv, path } from "@llamaindex/env";
|
||||
import { filetypeinfo } from "magic-bytes.js";
|
||||
import {
|
||||
ParsingService,
|
||||
type Body_upload_file_api_v1_parsing_upload_post,
|
||||
type ParserLanguages,
|
||||
ParsingService,
|
||||
} from "./api";
|
||||
import { sleep } from "./utils";
|
||||
|
||||
@@ -510,14 +510,7 @@ export class LlamaParseReader extends FileReader {
|
||||
jobId: string,
|
||||
imageName: string,
|
||||
): Promise<string> {
|
||||
// Get the full path
|
||||
let imagePath = `${downloadPath}/${jobId}-${imageName}`;
|
||||
// Get a valid image path
|
||||
if (!imagePath.endsWith(".png") && !imagePath.endsWith(".jpg")) {
|
||||
imagePath += ".png";
|
||||
}
|
||||
|
||||
return imagePath;
|
||||
return path.join(downloadPath, `${jobId}-${imageName}`);
|
||||
}
|
||||
|
||||
private async fetchAndSaveImage(
|
||||
@@ -538,10 +531,9 @@ export class LlamaParseReader extends FileReader {
|
||||
if (response.error) {
|
||||
throw new Error(`Failed to download image: ${response.error.detail}`);
|
||||
}
|
||||
const arrayBuffer = (await response.data) as ArrayBuffer;
|
||||
const buffer = new Uint8Array(arrayBuffer);
|
||||
const blob = (await response.data) as Blob;
|
||||
// Write the image buffer to the specified imagePath
|
||||
await fs.writeFile(imagePath, buffer);
|
||||
await fs.writeFile(imagePath, new Uint8Array(await blob.arrayBuffer()));
|
||||
}
|
||||
|
||||
// Filters out invalid values (null, undefined, empty string) of specific params.
|
||||
|
||||
@@ -1,5 +1,57 @@
|
||||
# @llamaindex/community
|
||||
|
||||
## 0.0.48
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- @llamaindex/core@0.3.0
|
||||
|
||||
## 0.0.47
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- @llamaindex/core@0.2.12
|
||||
|
||||
## 0.0.46
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- @llamaindex/core@0.2.11
|
||||
|
||||
## 0.0.45
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [3489e7d]
|
||||
- Updated dependencies [468bda5]
|
||||
- @llamaindex/core@0.2.10
|
||||
|
||||
## 0.0.44
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [b17d439]
|
||||
- @llamaindex/core@0.2.9
|
||||
|
||||
## 0.0.43
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 2774e80: feat: added meta3.2 support via Bedrock including vision, tool call and inference region support
|
||||
|
||||
## 0.0.42
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/core@0.2.8
|
||||
- @llamaindex/env@0.1.13
|
||||
|
||||
## 0.0.41
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -5,9 +5,11 @@
|
||||
## Current Features:
|
||||
|
||||
- Bedrock support for the Anthropic Claude Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
|
||||
- Bedrock support for the Meta LLama 2, 3 and 3.1 Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
|
||||
- Meta LLama3.1 405b tool call support
|
||||
- Bedrock support for the Meta LLama 2, 3, 3.1 and 3.2 Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
|
||||
- Meta LLama3.1 405b and Llama3.2 tool call support
|
||||
- Meta 3.2 11B and 90B vision support
|
||||
- Bedrock support for querying Knowledge Base
|
||||
- Bedrock: [Supported Regions and models for cross-region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html)
|
||||
|
||||
## LICENSE
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/community",
|
||||
"description": "Community package for LlamaIndexTS",
|
||||
"version": "0.0.41",
|
||||
"version": "0.0.48",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -2,5 +2,7 @@ export {
|
||||
BEDROCK_MODELS,
|
||||
BEDROCK_MODEL_MAX_TOKENS,
|
||||
Bedrock,
|
||||
INFERENCE_BEDROCK_MODELS,
|
||||
INFERENCE_TO_BEDROCK_MAP,
|
||||
} from "./llm/bedrock/index.js";
|
||||
export { AmazonKnowledgeBaseRetriever } from "./retrievers/bedrock.js";
|
||||
|
||||
@@ -6,7 +6,10 @@ import type {
|
||||
MessageContentDetail,
|
||||
ToolCallLLMMessageOptions,
|
||||
} from "@llamaindex/core/llms";
|
||||
import { mapMessageContentToMessageContentDetails } from "../utils";
|
||||
import {
|
||||
extractDataUrlComponents,
|
||||
mapMessageContentToMessageContentDetails,
|
||||
} from "../utils";
|
||||
import type {
|
||||
AnthropicContent,
|
||||
AnthropicImageContent,
|
||||
@@ -143,27 +146,6 @@ export const mapTextContent = (text: string): AnthropicTextContent => {
|
||||
return { type: "text", text };
|
||||
};
|
||||
|
||||
export const extractDataUrlComponents = (
|
||||
dataUrl: string,
|
||||
): {
|
||||
mimeType: string;
|
||||
base64: string;
|
||||
} => {
|
||||
const parts = dataUrl.split(";base64,");
|
||||
|
||||
if (parts.length !== 2 || !parts[0]!.startsWith("data:")) {
|
||||
throw new Error("Invalid data URL");
|
||||
}
|
||||
|
||||
const mimeType = parts[0]!.slice(5);
|
||||
const base64 = parts[1]!;
|
||||
|
||||
return {
|
||||
mimeType,
|
||||
base64,
|
||||
};
|
||||
};
|
||||
|
||||
export const mapImageContent = (imageUrl: string): AnthropicImageContent => {
|
||||
if (!imageUrl.startsWith("data:"))
|
||||
throw new Error(
|
||||
|
||||
@@ -47,35 +47,96 @@ export type BedrockChatParamsNonStreaming = LLMChatParamsNonStreaming<
|
||||
export type BedrockChatNonStreamResponse =
|
||||
ChatResponse<ToolCallLLMMessageOptions>;
|
||||
|
||||
export enum BEDROCK_MODELS {
|
||||
AMAZON_TITAN_TG1_LARGE = "amazon.titan-tg1-large",
|
||||
AMAZON_TITAN_TEXT_EXPRESS_V1 = "amazon.titan-text-express-v1",
|
||||
AI21_J2_GRANDE_INSTRUCT = "ai21.j2-grande-instruct",
|
||||
AI21_J2_JUMBO_INSTRUCT = "ai21.j2-jumbo-instruct",
|
||||
AI21_J2_MID = "ai21.j2-mid",
|
||||
AI21_J2_MID_V1 = "ai21.j2-mid-v1",
|
||||
AI21_J2_ULTRA = "ai21.j2-ultra",
|
||||
AI21_J2_ULTRA_V1 = "ai21.j2-ultra-v1",
|
||||
COHERE_COMMAND_TEXT_V14 = "cohere.command-text-v14",
|
||||
ANTHROPIC_CLAUDE_INSTANT_1 = "anthropic.claude-instant-v1",
|
||||
ANTHROPIC_CLAUDE_1 = "anthropic.claude-v1", // EOF: No longer supported
|
||||
ANTHROPIC_CLAUDE_2 = "anthropic.claude-v2",
|
||||
ANTHROPIC_CLAUDE_2_1 = "anthropic.claude-v2:1",
|
||||
ANTHROPIC_CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
ANTHROPIC_CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0",
|
||||
ANTHROPIC_CLAUDE_3_OPUS = "anthropic.claude-3-opus-20240229-v1:0",
|
||||
ANTHROPIC_CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
META_LLAMA2_13B_CHAT = "meta.llama2-13b-chat-v1",
|
||||
META_LLAMA2_70B_CHAT = "meta.llama2-70b-chat-v1",
|
||||
META_LLAMA3_8B_INSTRUCT = "meta.llama3-8b-instruct-v1:0",
|
||||
META_LLAMA3_70B_INSTRUCT = "meta.llama3-70b-instruct-v1:0",
|
||||
META_LLAMA3_1_8B_INSTRUCT = "meta.llama3-1-8b-instruct-v1:0",
|
||||
META_LLAMA3_1_70B_INSTRUCT = "meta.llama3-1-70b-instruct-v1:0",
|
||||
META_LLAMA3_1_405B_INSTRUCT = "meta.llama3-1-405b-instruct-v1:0",
|
||||
MISTRAL_7B_INSTRUCT = "mistral.mistral-7b-instruct-v0:2",
|
||||
MISTRAL_MIXTRAL_7B_INSTRUCT = "mistral.mixtral-8x7b-instruct-v0:1",
|
||||
MISTRAL_MIXTRAL_LARGE_2402 = "mistral.mistral-large-2402-v1:0",
|
||||
}
|
||||
export const BEDROCK_MODELS = {
|
||||
AMAZON_TITAN_TG1_LARGE: "amazon.titan-tg1-large",
|
||||
AMAZON_TITAN_TEXT_EXPRESS_V1: "amazon.titan-text-express-v1",
|
||||
AI21_J2_GRANDE_INSTRUCT: "ai21.j2-grande-instruct",
|
||||
AI21_J2_JUMBO_INSTRUCT: "ai21.j2-jumbo-instruct",
|
||||
AI21_J2_MID: "ai21.j2-mid",
|
||||
AI21_J2_MID_V1: "ai21.j2-mid-v1",
|
||||
AI21_J2_ULTRA: "ai21.j2-ultra",
|
||||
AI21_J2_ULTRA_V1: "ai21.j2-ultra-v1",
|
||||
COHERE_COMMAND_TEXT_V14: "cohere.command-text-v14",
|
||||
ANTHROPIC_CLAUDE_INSTANT_1: "anthropic.claude-instant-v1",
|
||||
ANTHROPIC_CLAUDE_1: "anthropic.claude-v1", // EOF: No longer supported
|
||||
ANTHROPIC_CLAUDE_2: "anthropic.claude-v2",
|
||||
ANTHROPIC_CLAUDE_2_1: "anthropic.claude-v2:1",
|
||||
ANTHROPIC_CLAUDE_3_SONNET: "anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
ANTHROPIC_CLAUDE_3_HAIKU: "anthropic.claude-3-haiku-20240307-v1:0",
|
||||
ANTHROPIC_CLAUDE_3_OPUS: "anthropic.claude-3-opus-20240229-v1:0",
|
||||
ANTHROPIC_CLAUDE_3_5_SONNET: "anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
META_LLAMA2_13B_CHAT: "meta.llama2-13b-chat-v1",
|
||||
META_LLAMA2_70B_CHAT: "meta.llama2-70b-chat-v1",
|
||||
META_LLAMA3_8B_INSTRUCT: "meta.llama3-8b-instruct-v1:0",
|
||||
META_LLAMA3_70B_INSTRUCT: "meta.llama3-70b-instruct-v1:0",
|
||||
META_LLAMA3_1_8B_INSTRUCT: "meta.llama3-1-8b-instruct-v1:0",
|
||||
META_LLAMA3_1_70B_INSTRUCT: "meta.llama3-1-70b-instruct-v1:0",
|
||||
META_LLAMA3_1_405B_INSTRUCT: "meta.llama3-1-405b-instruct-v1:0",
|
||||
META_LLAMA3_2_1B_INSTRUCT: "meta.llama3-2-1b-instruct-v1:0",
|
||||
META_LLAMA3_2_3B_INSTRUCT: "meta.llama3-2-3b-instruct-v1:0",
|
||||
META_LLAMA3_2_11B_INSTRUCT: "meta.llama3-2-11b-instruct-v1:0",
|
||||
META_LLAMA3_2_90B_INSTRUCT: "meta.llama3-2-90b-instruct-v1:0",
|
||||
MISTRAL_7B_INSTRUCT: "mistral.mistral-7b-instruct-v0:2",
|
||||
MISTRAL_MIXTRAL_7B_INSTRUCT: "mistral.mixtral-8x7b-instruct-v0:1",
|
||||
MISTRAL_MIXTRAL_LARGE_2402: "mistral.mistral-large-2402-v1:0",
|
||||
};
|
||||
export type BEDROCK_MODELS =
|
||||
(typeof BEDROCK_MODELS)[keyof typeof BEDROCK_MODELS];
|
||||
|
||||
export const INFERENCE_BEDROCK_MODELS = {
|
||||
US_ANTHROPIC_CLAUDE_3_HAIKU: "us.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
US_ANTHROPIC_CLAUDE_3_OPUS: "us.anthropic.claude-3-opus-20240229-v1:0",
|
||||
US_ANTHROPIC_CLAUDE_3_SONNET: "us.anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
US_ANTHROPIC_CLAUDE_3_5_SONNET:
|
||||
"us.anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
US_META_LLAMA_3_2_1B_INSTRUCT: "us.meta.llama3-2-1b-instruct-v1:0",
|
||||
US_META_LLAMA_3_2_3B_INSTRUCT: "us.meta.llama3-2-3b-instruct-v1:0",
|
||||
US_META_LLAMA_3_2_11B_INSTRUCT: "us.meta.llama3-2-11b-instruct-v1:0",
|
||||
US_META_LLAMA_3_2_90B_INSTRUCT: "us.meta.llama3-2-90b-instruct-v1:0",
|
||||
|
||||
EU_ANTHROPIC_CLAUDE_3_HAIKU: "eu.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
EU_ANTHROPIC_CLAUDE_3_SONNET: "eu.anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
EU_ANTHROPIC_CLAUDE_3_5_SONNET:
|
||||
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
EU_META_LLAMA_3_2_1B_INSTRUCT: "eu.meta.llama3-2-1b-instruct-v1:0",
|
||||
EU_META_LLAMA_3_2_3B_INSTRUCT: "eu.meta.llama3-2-3b-instruct-v1:0",
|
||||
};
|
||||
|
||||
export type INFERENCE_BEDROCK_MODELS =
|
||||
(typeof INFERENCE_BEDROCK_MODELS)[keyof typeof INFERENCE_BEDROCK_MODELS];
|
||||
|
||||
export const INFERENCE_TO_BEDROCK_MAP: Record<
|
||||
INFERENCE_BEDROCK_MODELS,
|
||||
BEDROCK_MODELS
|
||||
> = {
|
||||
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_HAIKU]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
|
||||
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_OPUS]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_OPUS,
|
||||
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_SONNET]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
|
||||
[INFERENCE_BEDROCK_MODELS.US_ANTHROPIC_CLAUDE_3_5_SONNET]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
|
||||
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_1B_INSTRUCT]:
|
||||
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
|
||||
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_3B_INSTRUCT]:
|
||||
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
|
||||
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_11B_INSTRUCT]:
|
||||
BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT,
|
||||
[INFERENCE_BEDROCK_MODELS.US_META_LLAMA_3_2_90B_INSTRUCT]:
|
||||
BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT,
|
||||
|
||||
[INFERENCE_BEDROCK_MODELS.EU_ANTHROPIC_CLAUDE_3_HAIKU]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
|
||||
[INFERENCE_BEDROCK_MODELS.EU_ANTHROPIC_CLAUDE_3_SONNET]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
|
||||
[INFERENCE_BEDROCK_MODELS.EU_ANTHROPIC_CLAUDE_3_5_SONNET]:
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
|
||||
[INFERENCE_BEDROCK_MODELS.EU_META_LLAMA_3_2_1B_INSTRUCT]:
|
||||
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
|
||||
[INFERENCE_BEDROCK_MODELS.EU_META_LLAMA_3_2_3B_INSTRUCT]:
|
||||
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
|
||||
};
|
||||
|
||||
/*
|
||||
* Values taken from https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html#model-parameters-claude
|
||||
@@ -109,6 +170,10 @@ const CHAT_ONLY_MODELS = {
|
||||
[BEDROCK_MODELS.META_LLAMA3_1_8B_INSTRUCT]: 128000,
|
||||
[BEDROCK_MODELS.META_LLAMA3_1_70B_INSTRUCT]: 128000,
|
||||
[BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT]: 128000,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT]: 131000,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT]: 131000,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT]: 128000,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT]: 128000,
|
||||
[BEDROCK_MODELS.MISTRAL_7B_INSTRUCT]: 32000,
|
||||
[BEDROCK_MODELS.MISTRAL_MIXTRAL_7B_INSTRUCT]: 32000,
|
||||
[BEDROCK_MODELS.MISTRAL_MIXTRAL_LARGE_2402]: 32000,
|
||||
@@ -139,17 +204,25 @@ export const STREAMING_MODELS = new Set([
|
||||
BEDROCK_MODELS.META_LLAMA3_1_8B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_1_70B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT,
|
||||
BEDROCK_MODELS.MISTRAL_7B_INSTRUCT,
|
||||
BEDROCK_MODELS.MISTRAL_MIXTRAL_7B_INSTRUCT,
|
||||
BEDROCK_MODELS.MISTRAL_MIXTRAL_LARGE_2402,
|
||||
]);
|
||||
|
||||
export const TOOL_CALL_MODELS = [
|
||||
export const TOOL_CALL_MODELS: BEDROCK_MODELS[] = [
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_OPUS,
|
||||
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
|
||||
BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT,
|
||||
BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT,
|
||||
];
|
||||
|
||||
const getProvider = (model: string): Provider => {
|
||||
@@ -166,7 +239,7 @@ const getProvider = (model: string): Provider => {
|
||||
};
|
||||
|
||||
export type BedrockModelParams = {
|
||||
model: keyof typeof BEDROCK_FOUNDATION_LLMS;
|
||||
model: BEDROCK_MODELS | INFERENCE_BEDROCK_MODELS;
|
||||
temperature?: number;
|
||||
topP?: number;
|
||||
maxTokens?: number;
|
||||
@@ -185,6 +258,10 @@ export const BEDROCK_MODEL_MAX_TOKENS: Partial<Record<BEDROCK_MODELS, number>> =
|
||||
[BEDROCK_MODELS.META_LLAMA3_1_8B_INSTRUCT]: 2048,
|
||||
[BEDROCK_MODELS.META_LLAMA3_1_70B_INSTRUCT]: 2048,
|
||||
[BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT]: 2048,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_1B_INSTRUCT]: 2048,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_3B_INSTRUCT]: 2048,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_11B_INSTRUCT]: 2048,
|
||||
[BEDROCK_MODELS.META_LLAMA3_2_90B_INSTRUCT]: 2048,
|
||||
};
|
||||
|
||||
const DEFAULT_BEDROCK_PARAMS = {
|
||||
@@ -193,14 +270,15 @@ const DEFAULT_BEDROCK_PARAMS = {
|
||||
maxTokens: 1024, // required by anthropic
|
||||
};
|
||||
|
||||
export type BedrockParams = BedrockModelParams & BedrockRuntimeClientConfig;
|
||||
export type BedrockParams = BedrockRuntimeClientConfig & BedrockModelParams;
|
||||
|
||||
/**
|
||||
* ToolCallLLM for Bedrock
|
||||
*/
|
||||
export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
|
||||
private client: BedrockRuntimeClient;
|
||||
model: keyof typeof BEDROCK_FOUNDATION_LLMS;
|
||||
protected actualModel: BEDROCK_MODELS | INFERENCE_BEDROCK_MODELS;
|
||||
model: BEDROCK_MODELS;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number;
|
||||
@@ -217,8 +295,8 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
|
||||
...params
|
||||
}: BedrockParams) {
|
||||
super();
|
||||
|
||||
this.model = model;
|
||||
this.actualModel = model;
|
||||
this.model = INFERENCE_TO_BEDROCK_MAP[model] ?? model;
|
||||
this.provider = getProvider(this.model);
|
||||
this.maxTokens = maxTokens ?? DEFAULT_BEDROCK_PARAMS.maxTokens;
|
||||
this.temperature = temperature ?? DEFAULT_BEDROCK_PARAMS.temperature;
|
||||
@@ -241,7 +319,7 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow: BEDROCK_FOUNDATION_LLMS[this.model],
|
||||
contextWindow: BEDROCK_FOUNDATION_LLMS[this.model] ?? 128000,
|
||||
tokenizer: undefined,
|
||||
};
|
||||
}
|
||||
@@ -256,6 +334,8 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
|
||||
params.additionalChatOptions,
|
||||
);
|
||||
const command = new InvokeModelCommand(input);
|
||||
command.input.modelId = this.actualModel;
|
||||
|
||||
const response = await this.client.send(command);
|
||||
let options: ToolCallLLMMessageOptions = {};
|
||||
if (this.supportToolCall) {
|
||||
@@ -287,6 +367,8 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
|
||||
params.additionalChatOptions,
|
||||
);
|
||||
const command = new InvokeModelWithResponseStreamCommand(input);
|
||||
command.input.modelId = this.actualModel;
|
||||
|
||||
const response = await this.client.send(command);
|
||||
|
||||
if (response.body) yield* this.provider.reduceStream(response.body);
|
||||
|
||||
@@ -67,21 +67,26 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
|
||||
for await (const response of stream) {
|
||||
const event = this.getStreamingEventResponse(response);
|
||||
const delta = this.getTextFromStreamResponse(response);
|
||||
|
||||
// odd quirk of llama3.1, start token is \n\n
|
||||
if (
|
||||
!toolId &&
|
||||
!event?.generation.trim() &&
|
||||
event?.generation_token_count === 1 &&
|
||||
event.prompt_token_count !== null
|
||||
event?.prompt_token_count !== null
|
||||
)
|
||||
continue;
|
||||
|
||||
if (delta === TOKENS.TOOL_CALL) {
|
||||
if (delta.startsWith(TOKENS.TOOL_CALL)) {
|
||||
toolId = randomUUID();
|
||||
const parts = delta.split(TOKENS.TOOL_CALL).filter((part) => part);
|
||||
collecting.push(...parts);
|
||||
continue;
|
||||
}
|
||||
|
||||
let options: undefined | ToolCallLLMMessageOptions = undefined;
|
||||
if (toolId && event?.stop_reason === "stop") {
|
||||
if (delta) collecting.push(delta);
|
||||
const tool = JSON.parse(collecting.join(""));
|
||||
options = {
|
||||
toolCall: [
|
||||
@@ -110,11 +115,18 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
|
||||
getRequestBody<T extends ChatMessage>(
|
||||
metadata: LLMMetadata,
|
||||
messages: T[],
|
||||
tools?: BaseTool[],
|
||||
tools: BaseTool[] = [],
|
||||
): InvokeModelCommandInput | InvokeModelWithResponseStreamCommandInput {
|
||||
let prompt: string = "";
|
||||
let images: string[] = [];
|
||||
if (metadata.model.startsWith("meta.llama3")) {
|
||||
prompt = mapChatMessagesToMetaLlama3Messages(messages, tools);
|
||||
const mapped = mapChatMessagesToMetaLlama3Messages({
|
||||
messages,
|
||||
tools,
|
||||
model: metadata.model,
|
||||
});
|
||||
prompt = mapped.prompt;
|
||||
images = mapped.images;
|
||||
} else if (metadata.model.startsWith("meta.llama2")) {
|
||||
prompt = mapChatMessagesToMetaLlama2Messages(messages);
|
||||
} else {
|
||||
@@ -127,6 +139,7 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
|
||||
accept: "application/json",
|
||||
body: JSON.stringify({
|
||||
prompt,
|
||||
images: images.length ? images : undefined,
|
||||
max_gen_len: metadata.maxTokens,
|
||||
temperature: metadata.temperature,
|
||||
top_p: metadata.topP,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import type {
|
||||
BaseTool,
|
||||
ChatMessage,
|
||||
LLMMetadata,
|
||||
MessageContentTextDetail,
|
||||
ToolCallLLMMessageOptions,
|
||||
} from "@llamaindex/core/llms";
|
||||
import { extractDataUrlComponents } from "../utils";
|
||||
import { TOKENS } from "./constants";
|
||||
import type { MetaMessage } from "./types";
|
||||
|
||||
const getToolCallInstructionString = (tool: BaseTool): string => {
|
||||
@@ -24,7 +27,7 @@ const getToolCallParametersString = (tool: BaseTool): string => {
|
||||
|
||||
// ported from https://github.com/meta-llama/llama-agentic-system/blob/main/llama_agentic_system/system_prompt.py
|
||||
// NOTE: using json instead of the above xml style tool calling works more reliability
|
||||
export const getToolsPrompt = (tools?: BaseTool[]) => {
|
||||
export const getToolsPrompt_3_1 = (tools?: BaseTool[]) => {
|
||||
if (!tools?.length) return "";
|
||||
|
||||
const customToolParams = tools.map((tool) => {
|
||||
@@ -77,6 +80,46 @@ Reminder:
|
||||
`;
|
||||
};
|
||||
|
||||
export const getToolsPrompt_3_2 = (tools?: BaseTool[]) => {
|
||||
if (!tools?.length) return "";
|
||||
return `
|
||||
You are an expert in composing functions. You are given a question and a set of possible functions.
|
||||
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
|
||||
If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
|
||||
also point it out. You should only return the function call in tools call sections.
|
||||
|
||||
If you decide to invoke any of the function(s), you MUST put it in the format of and start with the token: ${TOKENS.TOOL_CALL}:
|
||||
{
|
||||
"name": function_name,
|
||||
"parameters": parameters,
|
||||
}
|
||||
where
|
||||
|
||||
{
|
||||
"name": function_name,
|
||||
"parameters": parameters, => a JSON dict with the function argument name as key and function argument value as value.
|
||||
}
|
||||
|
||||
Here is an example,
|
||||
|
||||
{
|
||||
"name": "example_function_name",
|
||||
"parameters": {"example_name": "example_value"}
|
||||
}
|
||||
|
||||
Reminder:
|
||||
- Function calls MUST follow the specified format
|
||||
- Required parameters MUST be specified
|
||||
- Only call one function at a time
|
||||
- You SHOULD NOT include any other text in the response
|
||||
- Put the entire function call reply on one line
|
||||
|
||||
Here is a list of functions in JSON format that you can invoke.
|
||||
|
||||
${JSON.stringify(tools)}
|
||||
`;
|
||||
};
|
||||
|
||||
export const mapChatRoleToMetaRole = (
|
||||
role: ChatMessage["role"],
|
||||
): MetaMessage["role"] => {
|
||||
@@ -125,16 +168,46 @@ export const mapChatMessagesToMetaMessages = <
|
||||
/**
|
||||
* Documentation at https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3
|
||||
*/
|
||||
export const mapChatMessagesToMetaLlama3Messages = <T extends ChatMessage>(
|
||||
messages: T[],
|
||||
tools?: BaseTool[],
|
||||
): string => {
|
||||
export const mapChatMessagesToMetaLlama3Messages = <T extends ChatMessage>({
|
||||
messages,
|
||||
model,
|
||||
tools,
|
||||
}: {
|
||||
messages: T[];
|
||||
model: LLMMetadata["model"];
|
||||
tools?: BaseTool[];
|
||||
}): { prompt: string; images: string[] } => {
|
||||
const images: string[] = [];
|
||||
const textMessages: T[] = [];
|
||||
|
||||
messages.forEach((message) => {
|
||||
if (Array.isArray(message.content)) {
|
||||
message.content.forEach((content) => {
|
||||
if (content.type === "image_url") {
|
||||
const { base64 } = extractDataUrlComponents(content.image_url.url);
|
||||
images.push(base64);
|
||||
} else {
|
||||
textMessages.push(message);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
textMessages.push(message);
|
||||
}
|
||||
});
|
||||
|
||||
const parts: string[] = [];
|
||||
if (tools?.length) {
|
||||
|
||||
let toolsPrompt = "";
|
||||
if (model.startsWith("meta.llama3-2")) {
|
||||
toolsPrompt = getToolsPrompt_3_2(tools);
|
||||
} else if (model.startsWith("meta.llama3-1")) {
|
||||
toolsPrompt = getToolsPrompt_3_1(tools);
|
||||
}
|
||||
if (toolsPrompt) {
|
||||
parts.push(
|
||||
"<|begin_of_text|>",
|
||||
"<|start_header_id|>system<|end_header_id|>",
|
||||
getToolsPrompt(tools),
|
||||
toolsPrompt,
|
||||
"<|eot_id|>",
|
||||
);
|
||||
}
|
||||
@@ -154,7 +227,9 @@ export const mapChatMessagesToMetaLlama3Messages = <T extends ChatMessage>(
|
||||
...mapped,
|
||||
"<|start_header_id|>assistant<|end_header_id|>",
|
||||
);
|
||||
return parts.join("\n");
|
||||
|
||||
const prompt = parts.join("\n");
|
||||
return { prompt, images };
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -11,3 +11,24 @@ export const mapMessageContentToMessageContentDetails = (
|
||||
|
||||
export const toUtf8 = (input: Uint8Array): string =>
|
||||
new TextDecoder("utf-8").decode(input);
|
||||
|
||||
export const extractDataUrlComponents = (
|
||||
dataUrl: string,
|
||||
): {
|
||||
mimeType: string;
|
||||
base64: string;
|
||||
} => {
|
||||
const parts = dataUrl.split(";base64,");
|
||||
|
||||
if (parts.length !== 2 || !parts[0]!.startsWith("data:")) {
|
||||
throw new Error("Invalid data URL");
|
||||
}
|
||||
|
||||
const mimeType = parts[0]!.slice(5);
|
||||
const base64 = parts[1]!;
|
||||
|
||||
return {
|
||||
mimeType,
|
||||
base64,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,45 @@
|
||||
# @llamaindex/core
|
||||
|
||||
## 0.3.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- 1364e8e: update metadata extractors to use PromptTemplate
|
||||
- 96fc69c: add defaultQuestionExtractPrompt
|
||||
|
||||
## 0.2.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5f67820: Fix that node parsers generate nodes with UUIDs
|
||||
|
||||
## 0.2.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- ee697fb: fix: generate uuid when inserting to Qdrant
|
||||
|
||||
## 0.2.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 3489e7d: fix: num output incorrect in prompt helper
|
||||
- 468bda5: fix: correct warning when chunk size smaller than 0
|
||||
|
||||
## 0.2.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- b17d439: Fix #1278: resolved issue where the id\_ was not correctly passed as the id when creating a TextNode. As a result, the upsert operation to the vector database was using a generated ID instead of the provided document ID, if available.
|
||||
|
||||
## 0.2.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/env@0.1.13
|
||||
|
||||
## 0.2.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/core",
|
||||
"type": "module",
|
||||
"version": "0.2.7",
|
||||
"version": "0.3.0",
|
||||
"description": "LlamaIndex Core Module",
|
||||
"exports": {
|
||||
"./agent": {
|
||||
|
||||
@@ -8,18 +8,16 @@ import {
|
||||
Settings,
|
||||
} from "../global";
|
||||
import type { LLMMetadata } from "../llms";
|
||||
import { SentenceSplitter } from "../node-parser";
|
||||
import type { PromptTemplate } from "../prompts";
|
||||
import { TextSplitter, TokenTextSplitter, truncateText } from "../node-parser";
|
||||
import { BasePromptTemplate, PromptTemplate } from "../prompts";
|
||||
|
||||
/**
|
||||
* Get the empty prompt text given a prompt.
|
||||
*/
|
||||
function getEmptyPromptTxt(prompt: PromptTemplate) {
|
||||
return prompt.format({
|
||||
...Object.fromEntries(
|
||||
[...prompt.templateVars.keys()].map((key) => [key, ""]),
|
||||
),
|
||||
});
|
||||
function getEmptyPromptTxt(prompt: PromptTemplate): string {
|
||||
return prompt.format(
|
||||
Object.fromEntries([...prompt.templateVars.keys()].map((key) => [key, ""])),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -35,24 +33,24 @@ export function getBiggestPrompt(prompts: PromptTemplate[]): PromptTemplate {
|
||||
}
|
||||
|
||||
export type PromptHelperOptions = {
|
||||
contextWindow?: number;
|
||||
numOutput?: number;
|
||||
chunkOverlapRatio?: number;
|
||||
chunkSizeLimit?: number;
|
||||
tokenizer?: Tokenizer;
|
||||
separator?: string;
|
||||
contextWindow?: number | undefined;
|
||||
numOutput?: number | undefined;
|
||||
chunkOverlapRatio?: number | undefined;
|
||||
chunkSizeLimit?: number | undefined;
|
||||
tokenizer?: Tokenizer | undefined;
|
||||
separator?: string | undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* A collection of helper functions for working with prompts.
|
||||
*/
|
||||
export class PromptHelper {
|
||||
contextWindow = DEFAULT_CONTEXT_WINDOW;
|
||||
numOutput = DEFAULT_NUM_OUTPUTS;
|
||||
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
|
||||
contextWindow: number;
|
||||
numOutput: number;
|
||||
chunkOverlapRatio: number;
|
||||
chunkSizeLimit: number | undefined;
|
||||
tokenizer: Tokenizer;
|
||||
separator = " ";
|
||||
separator: string;
|
||||
|
||||
constructor(options: PromptHelperOptions = {}) {
|
||||
const {
|
||||
@@ -72,68 +70,93 @@ export class PromptHelper {
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a prompt, return the maximum size of the inputs to the prompt.
|
||||
* @param prompt
|
||||
* @returns
|
||||
* Calculate the available context size based on the number of prompt tokens.
|
||||
*/
|
||||
private getAvailableContextSize(prompt: PromptTemplate) {
|
||||
const emptyPromptText = getEmptyPromptTxt(prompt);
|
||||
const promptTokens = this.tokenizer.encode(emptyPromptText);
|
||||
const numPromptTokens = promptTokens.length;
|
||||
|
||||
return this.contextWindow - numPromptTokens - this.numOutput;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the maximum size of each chunk given a prompt.
|
||||
*/
|
||||
private getAvailableChunkSize(
|
||||
prompt: PromptTemplate,
|
||||
numChunks = 1,
|
||||
padding = 5,
|
||||
): number {
|
||||
const availableContextSize = this.getAvailableContextSize(prompt);
|
||||
|
||||
const result = Math.floor(availableContextSize / numChunks) - padding;
|
||||
|
||||
if (this.chunkSizeLimit) {
|
||||
return Math.min(this.chunkSizeLimit, result);
|
||||
} else {
|
||||
return result;
|
||||
#getAvailableContextSize(numPromptTokens: number): number {
|
||||
const contextSizeTokens =
|
||||
this.contextWindow - numPromptTokens - this.numOutput;
|
||||
if (contextSizeTokens < 0) {
|
||||
throw new Error(
|
||||
`Calculated available context size ${contextSizeTokens} is not non-negative.`,
|
||||
);
|
||||
}
|
||||
return contextSizeTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a text splitter with the correct chunk sizes and overlaps given a prompt.
|
||||
* Calculate the available chunk size based on the prompt and other parameters.
|
||||
*/
|
||||
#getAvailableChunkSize<Template extends BasePromptTemplate>(
|
||||
prompt: Template,
|
||||
numChunks: number = 1,
|
||||
padding: number = 5,
|
||||
): number {
|
||||
let numPromptTokens = 0;
|
||||
|
||||
if (prompt instanceof PromptTemplate) {
|
||||
numPromptTokens = this.tokenizer.encode(getEmptyPromptTxt(prompt)).length;
|
||||
}
|
||||
|
||||
const availableContextSize = this.#getAvailableContextSize(numPromptTokens);
|
||||
let result = Math.floor(availableContextSize / numChunks) - padding;
|
||||
|
||||
if (this.chunkSizeLimit !== undefined) {
|
||||
result = Math.min(this.chunkSizeLimit, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a text splitter configured to maximally pack the available context window.
|
||||
*/
|
||||
getTextSplitterGivenPrompt(
|
||||
prompt: PromptTemplate,
|
||||
numChunks = 1,
|
||||
padding = DEFAULT_PADDING,
|
||||
) {
|
||||
const chunkSize = this.getAvailableChunkSize(prompt, numChunks, padding);
|
||||
if (chunkSize === 0) {
|
||||
throw new Error("Got 0 as available chunk size");
|
||||
prompt: BasePromptTemplate,
|
||||
numChunks: number = 1,
|
||||
padding: number = DEFAULT_PADDING,
|
||||
): TextSplitter {
|
||||
const chunkSize = this.#getAvailableChunkSize(prompt, numChunks, padding);
|
||||
if (chunkSize <= 0) {
|
||||
throw new TypeError(`Chunk size ${chunkSize} is not positive.`);
|
||||
}
|
||||
const chunkOverlap = this.chunkOverlapRatio * chunkSize;
|
||||
return new SentenceSplitter({
|
||||
const chunkOverlap = Math.floor(this.chunkOverlapRatio * chunkSize);
|
||||
return new TokenTextSplitter({
|
||||
separator: this.separator,
|
||||
chunkSize,
|
||||
chunkOverlap,
|
||||
separator: this.separator,
|
||||
tokenizer: this.tokenizer,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Repack resplits the strings based on the optimal text splitter.
|
||||
* Truncate text chunks to fit within the available context window.
|
||||
*/
|
||||
truncate(
|
||||
prompt: BasePromptTemplate,
|
||||
textChunks: string[],
|
||||
padding: number = DEFAULT_PADDING,
|
||||
): string[] {
|
||||
const textSplitter = this.getTextSplitterGivenPrompt(
|
||||
prompt,
|
||||
textChunks.length,
|
||||
padding,
|
||||
);
|
||||
return textChunks.map((chunk) => truncateText(chunk, textSplitter));
|
||||
}
|
||||
|
||||
/**
|
||||
* Repack text chunks to better utilize the available context window.
|
||||
*/
|
||||
repack(
|
||||
prompt: PromptTemplate,
|
||||
prompt: BasePromptTemplate,
|
||||
textChunks: string[],
|
||||
padding = DEFAULT_PADDING,
|
||||
) {
|
||||
padding: number = DEFAULT_PADDING,
|
||||
): string[] {
|
||||
const textSplitter = this.getTextSplitterGivenPrompt(prompt, 1, padding);
|
||||
const combinedStr = textChunks.join("\n\n");
|
||||
const combinedStr = textChunks
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 0)
|
||||
.join("\n\n");
|
||||
return textSplitter.splitText(combinedStr);
|
||||
}
|
||||
|
||||
@@ -154,7 +177,8 @@ export class PromptHelper {
|
||||
} = options ?? {};
|
||||
return new PromptHelper({
|
||||
contextWindow: metadata.contextWindow,
|
||||
numOutput: metadata.maxTokens ?? DEFAULT_NUM_OUTPUTS,
|
||||
// fixme: numOutput is not in LLMMetadata
|
||||
numOutput: DEFAULT_NUM_OUTPUTS,
|
||||
chunkOverlapRatio,
|
||||
chunkSizeLimit,
|
||||
tokenizer,
|
||||
|
||||
@@ -13,6 +13,7 @@ export { MetadataAwareTextSplitter, NodeParser, TextSplitter } from "./base";
|
||||
export { MarkdownNodeParser } from "./markdown";
|
||||
export { SentenceSplitter } from "./sentence-splitter";
|
||||
export { SentenceWindowNodeParser } from "./sentence-window";
|
||||
export { TokenTextSplitter } from "./token-text-splitter";
|
||||
export type { SplitterParams } from "./type";
|
||||
export {
|
||||
splitByChar,
|
||||
@@ -20,5 +21,6 @@ export {
|
||||
splitByRegex,
|
||||
splitBySentenceTokenizer,
|
||||
splitBySep,
|
||||
truncateText,
|
||||
} from "./utils";
|
||||
export type { TextSplitterFn } from "./utils";
|
||||
|
||||
@@ -0,0 +1,206 @@
|
||||
import type { Tokenizer } from "@llamaindex/env";
|
||||
import { z } from "zod";
|
||||
import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, Settings } from "../global";
|
||||
import { MetadataAwareTextSplitter } from "./base";
|
||||
import type { SplitterParams } from "./type";
|
||||
import { splitByChar, splitBySep } from "./utils";
|
||||
|
||||
const DEFAULT_METADATA_FORMAT_LEN = 2;
|
||||
|
||||
const tokenTextSplitterSchema = z.object({
|
||||
chunkSize: z.number().positive().default(DEFAULT_CHUNK_SIZE),
|
||||
chunkOverlap: z.number().nonnegative().default(DEFAULT_CHUNK_OVERLAP),
|
||||
separator: z.string().default(" "),
|
||||
backupSeparators: z.array(z.string()).default(["\n"]),
|
||||
});
|
||||
|
||||
export class TokenTextSplitter extends MetadataAwareTextSplitter {
|
||||
chunkSize: number = DEFAULT_CHUNK_SIZE;
|
||||
chunkOverlap: number = DEFAULT_CHUNK_OVERLAP;
|
||||
separator: string = " ";
|
||||
backupSeparators: string[] = ["\n"];
|
||||
#tokenizer: Tokenizer;
|
||||
#splitFns: Array<(text: string) => string[]> = [];
|
||||
|
||||
constructor(
|
||||
params?: SplitterParams & Partial<z.infer<typeof tokenTextSplitterSchema>>,
|
||||
) {
|
||||
super();
|
||||
|
||||
if (params) {
|
||||
const parsedParams = tokenTextSplitterSchema.parse(params);
|
||||
this.chunkSize = parsedParams.chunkSize;
|
||||
this.chunkOverlap = parsedParams.chunkOverlap;
|
||||
this.separator = parsedParams.separator;
|
||||
this.backupSeparators = parsedParams.backupSeparators;
|
||||
}
|
||||
|
||||
if (this.chunkOverlap > this.chunkSize) {
|
||||
throw new Error(
|
||||
`Got a larger chunk overlap (${this.chunkOverlap}) than chunk size (${this.chunkSize}), should be smaller.`,
|
||||
);
|
||||
}
|
||||
|
||||
this.#tokenizer = params?.tokenizer ?? Settings.tokenizer;
|
||||
|
||||
const allSeparators = [this.separator, ...this.backupSeparators];
|
||||
this.#splitFns = allSeparators.map((sep) => splitBySep(sep));
|
||||
this.#splitFns.push(splitByChar());
|
||||
}
|
||||
|
||||
/**
|
||||
* Split text into chunks, reserving space required for metadata string.
|
||||
* @param text The text to split.
|
||||
* @param metadata The metadata string.
|
||||
* @returns An array of text chunks.
|
||||
*/
|
||||
splitTextMetadataAware(text: string, metadata: string): string[] {
|
||||
const metadataLength =
|
||||
this.tokenSize(metadata) + DEFAULT_METADATA_FORMAT_LEN;
|
||||
const effectiveChunkSize = this.chunkSize - metadataLength;
|
||||
|
||||
if (effectiveChunkSize <= 0) {
|
||||
throw new Error(
|
||||
`Metadata length (${metadataLength}) is longer than chunk size (${this.chunkSize}). ` +
|
||||
`Consider increasing the chunk size or decreasing the size of your metadata to avoid this.`,
|
||||
);
|
||||
} else if (effectiveChunkSize < 50) {
|
||||
console.warn(
|
||||
`Metadata length (${metadataLength}) is close to chunk size (${this.chunkSize}). ` +
|
||||
`Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.`,
|
||||
);
|
||||
}
|
||||
|
||||
return this._splitText(text, effectiveChunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split text into chunks.
|
||||
* @param text The text to split.
|
||||
* @returns An array of text chunks.
|
||||
*/
|
||||
splitText(text: string): string[] {
|
||||
return this._splitText(text, this.chunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method to split text into chunks up to a specified size.
|
||||
* @param text The text to split.
|
||||
* @param chunkSize The maximum size of each chunk.
|
||||
* @returns An array of text chunks.
|
||||
*/
|
||||
private _splitText(text: string, chunkSize: number): string[] {
|
||||
if (text === "") return [text];
|
||||
|
||||
// Dispatch chunking start event
|
||||
Settings.callbackManager.dispatchEvent("chunking-start", { text: [text] });
|
||||
|
||||
const splits = this._split(text, chunkSize);
|
||||
const chunks = this._merge(splits, chunkSize);
|
||||
|
||||
Settings.callbackManager.dispatchEvent("chunking-end", { chunks });
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Break text into splits that are smaller than the chunk size.
|
||||
* @param text The text to split.
|
||||
* @param chunkSize The maximum size of each split.
|
||||
* @returns An array of text splits.
|
||||
*/
|
||||
private _split(text: string, chunkSize: number): string[] {
|
||||
if (this.tokenSize(text) <= chunkSize) {
|
||||
return [text];
|
||||
}
|
||||
|
||||
for (const splitFn of this.#splitFns) {
|
||||
const splits = splitFn(text);
|
||||
if (splits.length > 1) {
|
||||
const newSplits: string[] = [];
|
||||
for (const split of splits) {
|
||||
const splitLen = this.tokenSize(split);
|
||||
if (splitLen <= chunkSize) {
|
||||
newSplits.push(split);
|
||||
} else {
|
||||
newSplits.push(...this._split(split, chunkSize));
|
||||
}
|
||||
}
|
||||
return newSplits;
|
||||
}
|
||||
}
|
||||
|
||||
return [text];
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge splits into chunks with overlap.
|
||||
* @param splits The array of text splits.
|
||||
* @param chunkSize The maximum size of each chunk.
|
||||
* @returns An array of merged text chunks.
|
||||
*/
|
||||
private _merge(splits: string[], chunkSize: number): string[] {
|
||||
const chunks: string[] = [];
|
||||
let currentChunk: string[] = [];
|
||||
let currentLength = 0;
|
||||
|
||||
for (const split of splits) {
|
||||
const splitLength = this.tokenSize(split);
|
||||
|
||||
if (splitLength > chunkSize) {
|
||||
console.warn(
|
||||
`Got a split of size ${splitLength}, larger than chunk size ${chunkSize}.`,
|
||||
);
|
||||
}
|
||||
|
||||
if (currentLength + splitLength > chunkSize) {
|
||||
const chunk = currentChunk.join("").trim();
|
||||
if (chunk) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
currentChunk = [];
|
||||
currentLength = 0;
|
||||
|
||||
const overlapTokens = this.chunkOverlap;
|
||||
const overlapSplits: string[] = [];
|
||||
|
||||
let overlapLength = 0;
|
||||
while (
|
||||
overlapSplits.length < splits.length &&
|
||||
overlapLength < overlapTokens
|
||||
) {
|
||||
const overlapSplit = currentChunk.shift();
|
||||
if (!overlapSplit) break;
|
||||
overlapSplits.push(overlapSplit);
|
||||
overlapLength += this.tokenSize(overlapSplit);
|
||||
}
|
||||
|
||||
for (const overlapSplit of overlapSplits.reverse()) {
|
||||
currentChunk.push(overlapSplit);
|
||||
currentLength += this.tokenSize(overlapSplit);
|
||||
if (currentLength >= overlapTokens) break;
|
||||
}
|
||||
}
|
||||
|
||||
currentChunk.push(split);
|
||||
currentLength += splitLength;
|
||||
}
|
||||
|
||||
const finalChunk = currentChunk.join("").trim();
|
||||
if (finalChunk) {
|
||||
chunks.push(finalChunk);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the number of tokens in the text using the tokenizer.
|
||||
* @param text The text to tokenize.
|
||||
* @returns The number of tokens.
|
||||
*/
|
||||
private tokenSize(text: string): number {
|
||||
return this.#tokenizer.encode(text).length;
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,10 @@ import SentenceTokenizer from "./sentence_tokenizer";
|
||||
|
||||
export type TextSplitterFn = (text: string) => string[];
|
||||
|
||||
const truncateText = (text: string, textSplitter: TextSplitter): string => {
|
||||
export const truncateText = (
|
||||
text: string,
|
||||
textSplitter: TextSplitter,
|
||||
): string => {
|
||||
const chunks = textSplitter.splitText(text);
|
||||
return chunks[0] ?? text;
|
||||
};
|
||||
|
||||
@@ -12,11 +12,15 @@ export {
|
||||
defaultCondenseQuestionPrompt,
|
||||
defaultContextSystemPrompt,
|
||||
defaultKeywordExtractPrompt,
|
||||
defaultNodeTextTemplate,
|
||||
defaultQueryKeywordExtractPrompt,
|
||||
defaultQuestionExtractPrompt,
|
||||
defaultRefinePrompt,
|
||||
defaultSubQuestionPrompt,
|
||||
defaultSummaryPrompt,
|
||||
defaultTextQAPrompt,
|
||||
defaultTitleCombinePromptTemplate,
|
||||
defaultTitleExtractorPromptTemplate,
|
||||
defaultTreeSummarizePrompt,
|
||||
} from "./prompt";
|
||||
export type {
|
||||
@@ -25,9 +29,12 @@ export type {
|
||||
ContextSystemPrompt,
|
||||
KeywordExtractPrompt,
|
||||
QueryKeywordExtractPrompt,
|
||||
QuestionExtractPrompt,
|
||||
RefinePrompt,
|
||||
SubQuestionPrompt,
|
||||
SummaryPrompt,
|
||||
TextQAPrompt,
|
||||
TitleCombinePrompt,
|
||||
TitleExtractorPrompt,
|
||||
TreeSummarizePrompt,
|
||||
} from "./prompt";
|
||||
|
||||
@@ -13,8 +13,12 @@ export type CondenseQuestionPrompt = PromptTemplate<
|
||||
["chatHistory", "question"]
|
||||
>;
|
||||
export type ContextSystemPrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractPrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractPrompt = PromptTemplate<["context", "maxKeywords"]>;
|
||||
export type QueryKeywordExtractPrompt = PromptTemplate<["question"]>;
|
||||
export type QuestionExtractPrompt = PromptTemplate<["context", "numQuestions"]>;
|
||||
export type TitleExtractorPrompt = PromptTemplate<["context"]>;
|
||||
export type TitleCombinePrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractorPrompt = PromptTemplate<["context", "numKeywords"]>;
|
||||
|
||||
export const defaultTextQAPrompt: TextQAPrompt = new PromptTemplate({
|
||||
templateVars: ["context", "query"],
|
||||
@@ -64,11 +68,13 @@ export const defaultRefinePrompt: RefinePrompt = new PromptTemplate({
|
||||
templateVars: ["query", "existingAnswer", "context"],
|
||||
template: `The original query is as follows: {query}
|
||||
We have provided an existing answer: {existingAnswer}
|
||||
We have the opportunity to refine the existing answer (only if needed) with some more context below.
|
||||
We have the opportunity to refine the existing answer
|
||||
(only if needed) with some more context below.
|
||||
------------
|
||||
{context}
|
||||
------------
|
||||
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
|
||||
Given the new context, refine the original answer to better answer the query.
|
||||
If the context isn't useful, return the original answer.
|
||||
Refined Answer:`,
|
||||
});
|
||||
|
||||
@@ -251,3 +257,55 @@ export const defaultQueryKeywordExtractPrompt = new PromptTemplate({
|
||||
}).partialFormat({
|
||||
maxKeywords: "10",
|
||||
});
|
||||
|
||||
export const defaultQuestionExtractPrompt = new PromptTemplate({
|
||||
templateVars: ["numQuestions", "context"],
|
||||
template: `(
|
||||
"Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. "
|
||||
"Try using these summaries to generate better questions that this context can answer."
|
||||
"---------------------"
|
||||
"{context}"
|
||||
"---------------------"
|
||||
"Provide questions in the following format: 'QUESTIONS: <questions>'"
|
||||
)`,
|
||||
}).partialFormat({
|
||||
numQuestions: "5",
|
||||
});
|
||||
|
||||
export const defaultTitleExtractorPromptTemplate = new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `{context}
|
||||
Give a title that summarizes all of the unique entities, titles or themes found in the context.
|
||||
Title: `,
|
||||
});
|
||||
|
||||
export const defaultTitleCombinePromptTemplate = new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `{context}
|
||||
Based on the above candidate titles and contents, what is the comprehensive title for this document?
|
||||
Title: `,
|
||||
});
|
||||
|
||||
export const defaultKeywordExtractorPromptTemplate = new PromptTemplate({
|
||||
templateVars: ["context", "numKeywords"],
|
||||
template: `{context}
|
||||
Give {numKeywords} unique keywords for this document.
|
||||
Format as comma separated.
|
||||
Keywords: `,
|
||||
}).partialFormat({
|
||||
keywordCount: "5",
|
||||
});
|
||||
|
||||
export const defaultNodeTextTemplate = new PromptTemplate({
|
||||
templateVars: ["metadataStr", "content"],
|
||||
template: `[Excerpt from document]
|
||||
{metadataStr}
|
||||
Excerpt:
|
||||
-----
|
||||
{content}
|
||||
-----
|
||||
`,
|
||||
}).partialFormat({
|
||||
metadataStr: "",
|
||||
content: "",
|
||||
});
|
||||
|
||||
@@ -403,27 +403,27 @@ class MultiModal extends BaseSynthesizer {
|
||||
}
|
||||
}
|
||||
|
||||
export function getResponseSynthesizer(
|
||||
mode: ResponseMode,
|
||||
const modeToSynthesizer = {
|
||||
compact: CompactAndRefine,
|
||||
refine: Refine,
|
||||
tree_summarize: TreeSummarize,
|
||||
multi_modal: MultiModal,
|
||||
} as const;
|
||||
|
||||
export function getResponseSynthesizer<Mode extends ResponseMode>(
|
||||
mode: Mode,
|
||||
options: BaseSynthesizerOptions & {
|
||||
textQATemplate?: TextQAPrompt;
|
||||
refineTemplate?: RefinePrompt;
|
||||
summaryTemplate?: TreeSummarizePrompt;
|
||||
metadataMode?: MetadataMode;
|
||||
} = {},
|
||||
) {
|
||||
switch (mode) {
|
||||
case "compact": {
|
||||
return new CompactAndRefine(options);
|
||||
}
|
||||
case "refine": {
|
||||
return new Refine(options);
|
||||
}
|
||||
case "tree_summarize": {
|
||||
return new TreeSummarize(options);
|
||||
}
|
||||
case "multi_modal": {
|
||||
return new MultiModal(options);
|
||||
}
|
||||
): InstanceType<(typeof modeToSynthesizer)[Mode]> {
|
||||
const Synthesizer: (typeof modeToSynthesizer)[Mode] = modeToSynthesizer[mode];
|
||||
if (!Synthesizer) {
|
||||
throw new Error(`Invalid response mode: ${mode}`);
|
||||
}
|
||||
return new Synthesizer(options) as InstanceType<
|
||||
(typeof modeToSynthesizer)[Mode]
|
||||
>;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import {
|
||||
SentenceSplitter,
|
||||
splitBySentenceTokenizer,
|
||||
} from "@llamaindex/core/node-parser";
|
||||
import { Document } from "@llamaindex/core/schema";
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
describe("sentence splitter", () => {
|
||||
@@ -115,4 +116,26 @@ describe("sentence splitter", () => {
|
||||
const split = splitBySentenceTokenizer();
|
||||
expect(split(text)).toEqual([text]);
|
||||
});
|
||||
|
||||
test("split nodes with UUID IDs and correct relationships", () => {
|
||||
const UUID_REGEX =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
|
||||
const sentenceSplitter = new SentenceSplitter();
|
||||
const docId = "test-doc-id";
|
||||
const doc = new Document({
|
||||
id_: docId,
|
||||
text: "This is a test sentence. This is another test sentence.",
|
||||
});
|
||||
const nodes = sentenceSplitter.getNodesFromDocuments([doc]);
|
||||
nodes.forEach((node) => {
|
||||
// test node id should match uuid regex
|
||||
expect(node.id_).toMatch(UUID_REGEX);
|
||||
|
||||
// test source reference to the doc ID
|
||||
const source = node.relationships?.SOURCE;
|
||||
expect(source).toBeDefined();
|
||||
expect(source).toHaveProperty("nodeId");
|
||||
expect((source as { nodeId: string }).nodeId).toEqual(docId);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -166,4 +166,68 @@ describe("Workflow", () => {
|
||||
greeting: "Hello Alice, you are 30 years old!",
|
||||
});
|
||||
});
|
||||
|
||||
test("workflow with two concurrent steps", async () => {
|
||||
const concurrentFlow = new Workflow({ verbose: true });
|
||||
|
||||
const step1 = vi.fn(async (_context, _ev: StartEvent) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
return new StopEvent({ result: "Step 1 completed" });
|
||||
});
|
||||
|
||||
const step2 = vi.fn(async (_context, _ev: StartEvent) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
return new StopEvent({ result: "Step 2 completed" });
|
||||
});
|
||||
|
||||
concurrentFlow.addStep(StartEvent, step1);
|
||||
concurrentFlow.addStep(StartEvent, step2);
|
||||
|
||||
const startTime = new Date();
|
||||
const result = await concurrentFlow.run("start");
|
||||
const endTime = new Date();
|
||||
const duration = endTime.getTime() - startTime.getTime();
|
||||
|
||||
expect(step1).toHaveBeenCalledTimes(1);
|
||||
expect(step2).toHaveBeenCalledTimes(1);
|
||||
expect(duration).toBeLessThan(200);
|
||||
expect(result.data.result).toBe("Step 2 completed");
|
||||
});
|
||||
|
||||
test("workflow with two concurrent cyclic steps", async () => {
|
||||
const concurrentCyclicFlow = new Workflow({ verbose: true });
|
||||
|
||||
class Step1Event extends WorkflowEvent {}
|
||||
class Step2Event extends WorkflowEvent {}
|
||||
|
||||
let step2Count = 0;
|
||||
|
||||
const step1 = vi.fn(async (_context, ev: StartEvent | Step1Event) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
return new Step1Event({ result: "Step 1 completed" });
|
||||
});
|
||||
|
||||
const step2 = vi.fn(async (_context, ev: StartEvent | Step2Event) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
step2Count++;
|
||||
if (step2Count >= 5) {
|
||||
return new StopEvent({ result: "Step 2 completed 5 times" });
|
||||
}
|
||||
return new Step2Event({ result: "Step 2 completed" });
|
||||
});
|
||||
|
||||
concurrentCyclicFlow.addStep([StartEvent, Step1Event], step1);
|
||||
concurrentCyclicFlow.addStep([StartEvent, Step2Event], step2);
|
||||
|
||||
const startTime = new Date();
|
||||
const result = await concurrentCyclicFlow.run("start");
|
||||
const endTime = new Date();
|
||||
const duration = endTime.getTime() - startTime.getTime();
|
||||
|
||||
expect(step1).toHaveBeenCalledTimes(1);
|
||||
expect(step2).toHaveBeenCalledTimes(5);
|
||||
expect(duration).toBeGreaterThan(500); // At least 5 * 100ms for step2
|
||||
expect(duration).toBeLessThan(1000); // Less than 1 second
|
||||
expect(result.data.result).toBe("Step 2 completed 5 times");
|
||||
});
|
||||
});
|
||||
|
||||
Vendored
+6
@@ -1,5 +1,11 @@
|
||||
# @llamaindex/env
|
||||
|
||||
## 0.1.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
|
||||
## 0.1.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
Vendored
+1
-1
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/env",
|
||||
"description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
|
||||
"version": "0.1.12",
|
||||
"version": "0.1.13",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,105 @@
|
||||
# @llamaindex/experimental
|
||||
|
||||
## 0.0.101
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.100
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.99
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.98
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.97
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.96
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.95
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.94
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.93
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.0.92
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.0.91
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.0.90
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.0.89
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.0.88
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/experimental",
|
||||
"description": "Experimental package for LlamaIndexTS",
|
||||
"version": "0.0.88",
|
||||
"version": "0.0.101",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,142 @@
|
||||
# llamaindex
|
||||
|
||||
## 0.7.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- 1364e8e: update metadata extractors to use PromptTemplate
|
||||
- 96fc69c: Correct initialization of QuestionsAnsweredExtractor so that it uses the promptTemplate arg when passed in
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 3b7736f: feat: added gemini 002 support
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- @llamaindex/core@0.3.0
|
||||
- @llamaindex/cloud@1.0.0
|
||||
- @llamaindex/ollama@0.0.8
|
||||
- @llamaindex/openai@0.1.17
|
||||
- @llamaindex/groq@0.0.16
|
||||
|
||||
## 0.6.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5729bd9: Fix LlamaCloud API calls for ensuring an index and for file uploads
|
||||
|
||||
## 0.6.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 6f75306: feat: support metadata filters for AstraDB
|
||||
- 94cb4ad: feat: Add metadata filters to ChromaDb and update to 1.9.2
|
||||
|
||||
## 0.6.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 6a9a7b1: fix: take init api key into account
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- @llamaindex/openai@0.1.16
|
||||
- @llamaindex/groq@0.0.15
|
||||
|
||||
## 0.6.19
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 62cba52: Add ensureIndex function to LlamaCloudIndex
|
||||
- d265e96: fix: ignore resolving unpdf for nextjs
|
||||
- d30bbf7: Convert undefined values to null in LlamaCloud filters
|
||||
- 53fd00a: Fix getPipelineId in LlamaCloudIndex
|
||||
|
||||
## 0.6.18
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5f67820: Fix that node parsers generate nodes with UUIDs
|
||||
- fe08d04: Fix LlamaCloud retrieval with multiple pipelines
|
||||
- Updated dependencies [5f67820]
|
||||
- @llamaindex/core@0.2.12
|
||||
- @llamaindex/cloud@0.2.14
|
||||
- @llamaindex/ollama@0.0.7
|
||||
- @llamaindex/openai@0.1.15
|
||||
- @llamaindex/groq@0.0.14
|
||||
|
||||
## 0.6.17
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- ee697fb: fix: generate uuid when inserting to Qdrant
|
||||
- Updated dependencies [ee697fb]
|
||||
- @llamaindex/core@0.2.11
|
||||
- @llamaindex/cloud@0.2.13
|
||||
- @llamaindex/ollama@0.0.6
|
||||
- @llamaindex/openai@0.1.14
|
||||
- @llamaindex/groq@0.0.13
|
||||
|
||||
## 0.6.16
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 63e9846: fix: preFilters does not work with asQueryEngine
|
||||
- 6f3a31c: feat: add metadata filters for Qdrant vector store
|
||||
- Updated dependencies [3489e7d]
|
||||
- Updated dependencies [468bda5]
|
||||
- @llamaindex/core@0.2.10
|
||||
- @llamaindex/cloud@0.2.12
|
||||
- @llamaindex/ollama@0.0.5
|
||||
- @llamaindex/openai@0.1.13
|
||||
- @llamaindex/groq@0.0.12
|
||||
|
||||
## 0.6.15
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 2a82413: fix(core): set `Settings.llm` to OpenAI by default and support lazy load openai
|
||||
- Updated dependencies [2a82413]
|
||||
- Updated dependencies [0b20ff9]
|
||||
- @llamaindex/groq@0.0.11
|
||||
- @llamaindex/openai@0.1.12
|
||||
- @llamaindex/cloud@0.2.11
|
||||
|
||||
## 0.6.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [b17d439]
|
||||
- @llamaindex/core@0.2.9
|
||||
- @llamaindex/ollama@0.0.4
|
||||
- @llamaindex/openai@0.1.11
|
||||
- @llamaindex/groq@0.0.10
|
||||
|
||||
## 0.6.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [981811e]
|
||||
- @llamaindex/cloud@0.2.10
|
||||
|
||||
## 0.6.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- f7b4e94: feat: add filters for pinecone
|
||||
- 78037a6: fix: bypass service context embed model
|
||||
- 1d9e3b1: fix: export llama reader in non-nodejs runtime
|
||||
|
||||
## 0.6.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- df441e2: fix: consoleLogger is missing from `@llamaindex/env`
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/cloud@0.2.9
|
||||
- @llamaindex/core@0.2.8
|
||||
- @llamaindex/env@0.1.13
|
||||
- @llamaindex/ollama@0.0.3
|
||||
- @llamaindex/openai@0.1.10
|
||||
- @llamaindex/groq@0.0.9
|
||||
|
||||
## 0.6.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,104 @@
|
||||
# @llamaindex/cloudflare-worker-agent-test
|
||||
|
||||
## 0.0.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.0.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.0.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.0.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.0.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.0.72
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloudflare-worker-agent-test",
|
||||
"version": "0.0.72",
|
||||
"version": "0.0.85",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,5 +1,50 @@
|
||||
# @llamaindex/llama-parse-browser-test
|
||||
|
||||
## 0.0.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@1.0.0
|
||||
|
||||
## 0.0.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@0.2.14
|
||||
|
||||
## 0.0.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@0.2.13
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@0.2.12
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [0b20ff9]
|
||||
- @llamaindex/cloud@0.2.11
|
||||
|
||||
## 0.0.6
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [981811e]
|
||||
- @llamaindex/cloud@0.2.10
|
||||
|
||||
## 0.0.5
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- @llamaindex/cloud@0.2.9
|
||||
|
||||
## 0.0.4
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/llama-parse-browser-test",
|
||||
"private": true,
|
||||
"version": "0.0.4",
|
||||
"version": "0.0.11",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
|
||||
@@ -1,5 +1,104 @@
|
||||
# @llamaindex/next-agent-test
|
||||
|
||||
## 0.1.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.1.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.1.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.1.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.1.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.1.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.1.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.1.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.1.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.1.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.1.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.1.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.1.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.1.72
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-agent-test",
|
||||
"version": "0.1.72",
|
||||
"version": "0.1.85",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,104 @@
|
||||
# test-edge-runtime
|
||||
|
||||
## 0.1.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.1.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.1.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.1.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.1.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.1.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.1.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.1.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.1.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.1.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.1.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.1.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.1.72
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.1.71
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/nextjs-edge-runtime-test",
|
||||
"version": "0.1.71",
|
||||
"version": "0.1.84",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,104 @@
|
||||
# @llamaindex/next-node-runtime
|
||||
|
||||
## 0.0.66
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.65
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.64
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.63
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.62
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.61
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.60
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.59
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.58
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.0.57
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.0.56
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.0.55
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.0.54
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.0.53
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-node-runtime-test",
|
||||
"version": "0.0.53",
|
||||
"version": "0.0.66",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,104 @@
|
||||
# @llamaindex/waku-query-engine-test
|
||||
|
||||
## 0.0.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2a82413]
|
||||
- llamaindex@0.6.15
|
||||
|
||||
## 0.0.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.14
|
||||
|
||||
## 0.0.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.6.13
|
||||
|
||||
## 0.0.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [f7b4e94]
|
||||
- Updated dependencies [78037a6]
|
||||
- Updated dependencies [1d9e3b1]
|
||||
- llamaindex@0.6.12
|
||||
|
||||
## 0.0.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [df441e2]
|
||||
- llamaindex@0.6.11
|
||||
|
||||
## 0.0.72
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/waku-query-engine-test",
|
||||
"version": "0.0.72",
|
||||
"version": "0.0.85",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "llamaindex",
|
||||
"version": "0.6.10",
|
||||
"version": "0.7.0",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
"keywords": [
|
||||
@@ -48,7 +48,7 @@
|
||||
"@zilliz/milvus2-sdk-node": "^2.4.6",
|
||||
"ajv": "^8.17.1",
|
||||
"assemblyai": "^4.7.0",
|
||||
"chromadb": "1.8.1",
|
||||
"chromadb": "1.9.2",
|
||||
"cohere-ai": "7.13.0",
|
||||
"discord-api-types": "^0.37.98",
|
||||
"groq-sdk": "^0.6.1",
|
||||
|
||||
@@ -2,7 +2,6 @@ import {
|
||||
type CallbackManager,
|
||||
Settings as CoreSettings,
|
||||
} from "@llamaindex/core/global";
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
|
||||
import { PromptHelper } from "@llamaindex/core/indices";
|
||||
|
||||
@@ -61,12 +60,6 @@ class GlobalSettings implements Config {
|
||||
}
|
||||
|
||||
get llm(): LLM {
|
||||
// fixme: we might need check internal error instead of try-catch here
|
||||
try {
|
||||
CoreSettings.llm;
|
||||
} catch (error) {
|
||||
CoreSettings.llm = new OpenAI();
|
||||
}
|
||||
return CoreSettings.llm;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ export class LLamaCloudFileService {
|
||||
) {
|
||||
initService();
|
||||
const { data: file } = await FilesService.uploadFileApiV1FilesPost({
|
||||
path: { project_id: projectId },
|
||||
query: { project_id: projectId },
|
||||
body: {
|
||||
upload_file: uploadFile,
|
||||
},
|
||||
@@ -85,7 +85,7 @@ export class LLamaCloudFileService {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100)); // Sleep for 100ms
|
||||
}
|
||||
throw new Error(
|
||||
`File processing did not complete after ${maxAttempts} attempts.`,
|
||||
`File processing did not complete after ${maxAttempts} attempts. Check your LlamaCloud index at https://cloud.llamaindex.ai/project/${projectId}/deploy/${pipelineId} for more details.`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
|
||||
import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
|
||||
import type { Document, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { Document } from "@llamaindex/core/schema";
|
||||
import { RetrieverQueryEngine } from "../engines/query/RetrieverQueryEngine.js";
|
||||
import type { BaseNodePostprocessor } from "../postprocessors/types.js";
|
||||
import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js";
|
||||
import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js";
|
||||
import { getPipelineCreate } from "./config.js";
|
||||
import type { CloudConstructorParams } from "./type.js";
|
||||
import { getAppBaseUrl, getProjectId, initService } from "./utils.js";
|
||||
import {
|
||||
getAppBaseUrl,
|
||||
getPipelineId,
|
||||
getProjectId,
|
||||
initService,
|
||||
} from "./utils.js";
|
||||
|
||||
import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api";
|
||||
import { SentenceSplitter } from "@llamaindex/core/node-parser";
|
||||
import { PipelinesService, type PipelineCreate } from "@llamaindex/cloud/api";
|
||||
import type { BaseRetriever } from "@llamaindex/core/retriever";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
import { Settings } from "../Settings.js";
|
||||
|
||||
export class LlamaCloudIndex {
|
||||
@@ -28,10 +30,7 @@ export class LlamaCloudIndex {
|
||||
verbose = Settings.debug,
|
||||
raiseOnError = false,
|
||||
): Promise<void> {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
if (verbose) {
|
||||
console.log("Waiting for pipeline ingestion: ");
|
||||
@@ -78,10 +77,7 @@ export class LlamaCloudIndex {
|
||||
verbose = Settings.debug,
|
||||
raiseOnError = false,
|
||||
): Promise<void> {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
if (verbose) {
|
||||
console.log("Loading data: ");
|
||||
@@ -143,17 +139,13 @@ export class LlamaCloudIndex {
|
||||
public async getPipelineId(
|
||||
name?: string,
|
||||
projectName?: string,
|
||||
organizationId?: string,
|
||||
): Promise<string> {
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
path: {
|
||||
project_id: await this.getProjectId(projectName),
|
||||
project_name: name ?? this.params.name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
return pipelines[0]!.id;
|
||||
return await getPipelineId(
|
||||
name ?? this.params.name,
|
||||
projectName ?? this.params.projectName,
|
||||
organizationId ?? this.params.organizationId,
|
||||
);
|
||||
}
|
||||
|
||||
public async getProjectId(
|
||||
@@ -166,75 +158,42 @@ export class LlamaCloudIndex {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds documents to the given index parameters. If the index does not exist, it will be created.
|
||||
*
|
||||
* @param params - An object containing the following properties:
|
||||
* - documents: An array of Document objects to be added to the index.
|
||||
* - verbose: Optional boolean to enable verbose logging.
|
||||
* - Additional properties from CloudConstructorParams.
|
||||
* @returns A Promise that resolves to a new LlamaCloudIndex instance.
|
||||
*/
|
||||
static async fromDocuments(
|
||||
params: {
|
||||
documents: Document[];
|
||||
transformations?: TransformComponent[];
|
||||
verbose?: boolean;
|
||||
} & CloudConstructorParams,
|
||||
config?: {
|
||||
embedding: PipelineCreate["embedding_config"];
|
||||
transform: PipelineCreate["transform_config"];
|
||||
},
|
||||
): Promise<LlamaCloudIndex> {
|
||||
initService(params);
|
||||
const defaultTransformations: TransformComponent[] = [
|
||||
new SentenceSplitter(),
|
||||
new OpenAIEmbedding({
|
||||
apiKey: getEnv("OPENAI_API_KEY"),
|
||||
}),
|
||||
];
|
||||
const index = new LlamaCloudIndex({ ...params });
|
||||
await index.ensureIndex({ ...config, verbose: params.verbose ?? false });
|
||||
await index.addDocuments(params.documents, params.verbose);
|
||||
return index;
|
||||
}
|
||||
|
||||
async addDocuments(documents: Document[], verbose?: boolean): Promise<void> {
|
||||
const apiUrl = getAppBaseUrl();
|
||||
|
||||
const pipelineCreateParams = await getPipelineCreate({
|
||||
pipelineName: params.name,
|
||||
pipelineType: "MANAGED",
|
||||
inputNodes: params.documents,
|
||||
transformations: params.transformations ?? defaultTransformations,
|
||||
});
|
||||
|
||||
const { data: project } =
|
||||
await ProjectsService.upsertProjectApiV1ProjectsPut({
|
||||
path: {
|
||||
organization_id: params.organizationId,
|
||||
},
|
||||
body: {
|
||||
name: params.projectName ?? "default",
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (!project.id) {
|
||||
throw new Error("Project ID should be defined");
|
||||
}
|
||||
|
||||
const { data: pipeline } =
|
||||
await PipelinesService.upsertPipelineApiV1PipelinesPut({
|
||||
path: {
|
||||
project_id: project.id,
|
||||
},
|
||||
body: pipelineCreateParams.configured_transformations
|
||||
? {
|
||||
name: params.name,
|
||||
configured_transformations:
|
||||
pipelineCreateParams.configured_transformations,
|
||||
}
|
||||
: {
|
||||
name: params.name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (!pipeline.id) {
|
||||
throw new Error("Pipeline ID must be defined");
|
||||
}
|
||||
|
||||
if (params.verbose) {
|
||||
console.log(`Created pipeline ${pipeline.id} with name ${params.name}`);
|
||||
}
|
||||
const projectId = await this.getProjectId();
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut(
|
||||
{
|
||||
path: {
|
||||
pipeline_id: pipeline.id,
|
||||
pipeline_id: pipelineId,
|
||||
},
|
||||
body: params.documents.map((doc) => ({
|
||||
body: documents.map((doc) => ({
|
||||
metadata: doc.metadata,
|
||||
text: doc.text,
|
||||
excluded_embed_metadata_keys: doc.excludedEmbedMetadataKeys,
|
||||
@@ -248,7 +207,7 @@ export class LlamaCloudIndex {
|
||||
const { data: pipelineStatus } =
|
||||
await PipelinesService.getPipelineStatusApiV1PipelinesPipelineIdStatusGet(
|
||||
{
|
||||
path: { pipeline_id: pipeline.id },
|
||||
path: { pipeline_id: pipelineId },
|
||||
throwOnError: true,
|
||||
},
|
||||
);
|
||||
@@ -262,32 +221,30 @@ export class LlamaCloudIndex {
|
||||
|
||||
if (pipelineStatus.status === "ERROR") {
|
||||
console.error(
|
||||
`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${project.id}/deploy/${pipeline.id}`,
|
||||
`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`,
|
||||
);
|
||||
throw new Error("Some documents failed to ingest");
|
||||
}
|
||||
|
||||
if (pipelineStatus.status === "PARTIAL_SUCCESS") {
|
||||
console.info(
|
||||
`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${project.id}/deploy/${pipeline.id}`,
|
||||
`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if (params.verbose) {
|
||||
if (verbose) {
|
||||
process.stdout.write(".");
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
if (params.verbose) {
|
||||
if (verbose) {
|
||||
console.info(
|
||||
`Ingestion completed, find your index at ${apiUrl}/project/${project.id}/deploy/${pipeline.id}`,
|
||||
`Ingestion completed, find your index at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`,
|
||||
);
|
||||
}
|
||||
|
||||
return new LlamaCloudIndex({ ...params });
|
||||
}
|
||||
|
||||
asRetriever(params: CloudRetrieveParams = {}): BaseRetriever {
|
||||
@@ -308,20 +265,12 @@ export class LlamaCloudIndex {
|
||||
return new RetrieverQueryEngine(
|
||||
retriever,
|
||||
params?.responseSynthesizer,
|
||||
params?.preFilters,
|
||||
params?.nodePostprocessors,
|
||||
);
|
||||
}
|
||||
|
||||
async insert(document: Document) {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
|
||||
if (!pipelineId) {
|
||||
throw new Error("We couldn't find the pipeline ID for the given name");
|
||||
}
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost(
|
||||
{
|
||||
@@ -344,14 +293,7 @@ export class LlamaCloudIndex {
|
||||
}
|
||||
|
||||
async delete(document: Document) {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
|
||||
if (!pipelineId) {
|
||||
throw new Error("We couldn't find the pipeline ID for the given name");
|
||||
}
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete(
|
||||
{
|
||||
@@ -366,14 +308,7 @@ export class LlamaCloudIndex {
|
||||
}
|
||||
|
||||
async refreshDoc(document: Document) {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
|
||||
if (!pipelineId) {
|
||||
throw new Error("We couldn't find the pipeline ID for the given name");
|
||||
}
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut(
|
||||
{
|
||||
@@ -394,4 +329,71 @@ export class LlamaCloudIndex {
|
||||
|
||||
await this.waitForDocumentIngestion([document.id_]);
|
||||
}
|
||||
|
||||
public async ensureIndex(config?: {
|
||||
embedding?: PipelineCreate["embedding_config"];
|
||||
transform?: PipelineCreate["transform_config"];
|
||||
verbose?: boolean;
|
||||
}): Promise<void> {
|
||||
const projectId = await this.getProjectId();
|
||||
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
query: {
|
||||
project_id: projectId,
|
||||
pipeline_name: this.params.name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (pipelines.length === 0) {
|
||||
// no pipeline found, create a new one
|
||||
let embeddingConfig = config?.embedding;
|
||||
if (!embeddingConfig) {
|
||||
// no embedding config provided, use OpenAI as default
|
||||
const openAIApiKey = getEnv("OPENAI_API_KEY");
|
||||
const embeddingModel = getEnv("EMBEDDING_MODEL");
|
||||
if (!openAIApiKey || !embeddingModel) {
|
||||
throw new Error(
|
||||
"No embedding configuration provided. Fallback to OpenAI embedding model. OPENAI_API_KEY and EMBEDDING_MODEL environment variables must be set.",
|
||||
);
|
||||
}
|
||||
embeddingConfig = {
|
||||
type: "OPENAI_EMBEDDING",
|
||||
component: {
|
||||
api_key: openAIApiKey,
|
||||
model_name: embeddingModel,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let transformConfig = config?.transform;
|
||||
if (!transformConfig) {
|
||||
transformConfig = {
|
||||
mode: "auto",
|
||||
chunk_size: 1024,
|
||||
chunk_overlap: 200,
|
||||
};
|
||||
}
|
||||
|
||||
const { data: pipeline } =
|
||||
await PipelinesService.upsertPipelineApiV1PipelinesPut({
|
||||
query: {
|
||||
project_id: projectId,
|
||||
},
|
||||
body: {
|
||||
name: this.params.name,
|
||||
embedding_config: embeddingConfig,
|
||||
transform_config: transformConfig,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (config?.verbose) {
|
||||
console.log(
|
||||
`Created pipeline ${pipeline.id} with name ${pipeline.name}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
PipelinesService,
|
||||
type RetrievalParams,
|
||||
@@ -11,7 +12,7 @@ import type { NodeWithScore } from "@llamaindex/core/schema";
|
||||
import { jsonToNode, ObjectType } from "@llamaindex/core/schema";
|
||||
import { extractText } from "@llamaindex/core/utils";
|
||||
import type { ClientParams, CloudConstructorParams } from "./type.js";
|
||||
import { getProjectId, initService } from "./utils.js";
|
||||
import { getPipelineId, initService } from "./utils.js";
|
||||
|
||||
export type CloudRetrieveParams = Omit<
|
||||
RetrievalParams,
|
||||
@@ -42,6 +43,24 @@ export class LlamaCloudRetriever extends BaseRetriever {
|
||||
});
|
||||
}
|
||||
|
||||
// LlamaCloud expects null values for filters, but LlamaIndexTS uses undefined for empty values
|
||||
// This function converts the undefined values to null
|
||||
private convertFilter(filters?: MetadataFilters): MetadataFilters | null {
|
||||
if (!filters) return null;
|
||||
|
||||
const processFilter = (
|
||||
filter: MetadataFilter | MetadataFilters,
|
||||
): MetadataFilter | MetadataFilters => {
|
||||
if ("filters" in filter) {
|
||||
// type MetadataFilters
|
||||
return { ...filter, filters: filter.filters.map(processFilter) };
|
||||
}
|
||||
return { ...filter, value: filter.value ?? null };
|
||||
};
|
||||
|
||||
return { ...filters, filters: filters.filters.map(processFilter) };
|
||||
}
|
||||
|
||||
constructor(params: CloudConstructorParams & CloudRetrieveParams) {
|
||||
super();
|
||||
this.clientParams = { apiKey: params.apiKey, baseUrl: params.baseUrl };
|
||||
@@ -57,45 +76,24 @@ export class LlamaCloudRetriever extends BaseRetriever {
|
||||
}
|
||||
|
||||
async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> {
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
query: {
|
||||
project_id: await getProjectId(this.projectName, this.organizationId),
|
||||
project_name: this.pipelineName,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
const pipelineId = await getPipelineId(
|
||||
this.pipelineName,
|
||||
this.projectName,
|
||||
this.organizationId,
|
||||
);
|
||||
|
||||
if (pipelines.length === 0 || !pipelines[0]!.id) {
|
||||
throw new Error(
|
||||
`No pipeline found with name ${this.pipelineName} in project ${this.projectName}`,
|
||||
);
|
||||
}
|
||||
|
||||
const { data: pipeline } =
|
||||
await PipelinesService.getPipelineApiV1PipelinesPipelineIdGet({
|
||||
path: {
|
||||
pipeline_id: pipelines[0]!.id,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (!pipeline) {
|
||||
throw new Error(
|
||||
`No pipeline found with name ${this.pipelineName} in project ${this.projectName}`,
|
||||
);
|
||||
}
|
||||
const filters = this.convertFilter(this.retrieveParams.filters);
|
||||
|
||||
const { data: results } =
|
||||
await PipelinesService.runSearchApiV1PipelinesPipelineIdRetrievePost({
|
||||
throwOnError: true,
|
||||
path: {
|
||||
pipeline_id: pipeline.id,
|
||||
pipeline_id: pipelineId,
|
||||
},
|
||||
body: {
|
||||
...this.retrieveParams,
|
||||
query: extractText(query),
|
||||
search_filters: this.retrieveParams.filters as MetadataFilters,
|
||||
search_filters: filters,
|
||||
dense_similarity_top_k: this.retrieveParams.similarityTopK!,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
import type {
|
||||
ConfiguredTransformationItem,
|
||||
PipelineCreate,
|
||||
PipelineType,
|
||||
} from "@llamaindex/cloud/api";
|
||||
import { SentenceSplitter } from "@llamaindex/core/node-parser";
|
||||
import { BaseNode, type TransformComponent } from "@llamaindex/core/schema";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
|
||||
export type GetPipelineCreateParams = {
|
||||
pipelineName: string;
|
||||
pipelineType: PipelineType;
|
||||
transformations?: TransformComponent[];
|
||||
inputNodes?: BaseNode[];
|
||||
};
|
||||
|
||||
function getTransformationConfig(
|
||||
transformation: TransformComponent,
|
||||
): ConfiguredTransformationItem {
|
||||
if (transformation instanceof SentenceSplitter) {
|
||||
return {
|
||||
configurable_transformation_type: "SENTENCE_AWARE_NODE_PARSER",
|
||||
component: {
|
||||
chunk_size: transformation.chunkSize, // TODO: set to public in SentenceSplitter
|
||||
chunk_overlap: transformation.chunkOverlap, // TODO: set to public in SentenceSplitter
|
||||
include_metadata: transformation.includeMetadata,
|
||||
include_prev_next_rel: transformation.includePrevNextRel,
|
||||
},
|
||||
};
|
||||
}
|
||||
if (transformation instanceof OpenAIEmbedding) {
|
||||
return {
|
||||
configurable_transformation_type: "OPENAI_EMBEDDING",
|
||||
component: {
|
||||
model: transformation.model,
|
||||
api_key: transformation.apiKey,
|
||||
embed_batch_size: transformation.embedBatchSize,
|
||||
dimensions: transformation.dimensions,
|
||||
},
|
||||
};
|
||||
}
|
||||
throw new Error(`Unsupported transformation: ${typeof transformation}`);
|
||||
}
|
||||
|
||||
export async function getPipelineCreate(
|
||||
params: GetPipelineCreateParams,
|
||||
): Promise<PipelineCreate> {
|
||||
const { pipelineName, pipelineType, transformations = [] } = params;
|
||||
|
||||
return {
|
||||
name: pipelineName,
|
||||
configured_transformations: transformations.map(getTransformationConfig),
|
||||
pipeline_type: pipelineType,
|
||||
};
|
||||
}
|
||||
@@ -1,4 +1,8 @@
|
||||
import { client, ProjectsService } from "@llamaindex/cloud/api";
|
||||
import {
|
||||
client,
|
||||
PipelinesService,
|
||||
ProjectsService,
|
||||
} from "@llamaindex/cloud/api";
|
||||
import { DEFAULT_BASE_URL } from "@llamaindex/core/global";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { ClientParams } from "./type.js";
|
||||
@@ -40,9 +44,9 @@ export async function getProjectId(
|
||||
): Promise<string> {
|
||||
const { data: projects } = await ProjectsService.listProjectsApiV1ProjectsGet(
|
||||
{
|
||||
path: {
|
||||
query: {
|
||||
project_name: projectName,
|
||||
organization_id: organizationId,
|
||||
organization_id: organizationId ?? null,
|
||||
},
|
||||
throwOnError: true,
|
||||
},
|
||||
@@ -66,3 +70,26 @@ export async function getProjectId(
|
||||
|
||||
return project.id;
|
||||
}
|
||||
|
||||
export async function getPipelineId(
|
||||
name: string,
|
||||
projectName: string,
|
||||
organizationId?: string,
|
||||
): Promise<string> {
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
query: {
|
||||
project_id: await getProjectId(projectName, organizationId),
|
||||
pipeline_name: name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (pipelines.length === 0 || !pipelines[0]!.id) {
|
||||
throw new Error(
|
||||
`No pipeline found with name ${name} in project ${projectName}`,
|
||||
);
|
||||
}
|
||||
|
||||
return pipelines[0]!.id;
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
|
||||
export class FireworksEmbedding extends OpenAIEmbedding {
|
||||
constructor(init?: Partial<OpenAIEmbedding>) {
|
||||
constructor(init?: Omit<Partial<OpenAIEmbedding>, "session">) {
|
||||
const {
|
||||
apiKey = getEnv("FIREWORKS_API_KEY"),
|
||||
additionalSessionOptions = {},
|
||||
|
||||
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
|
||||
export class TogetherEmbedding extends OpenAIEmbedding {
|
||||
constructor(init?: Partial<OpenAIEmbedding>) {
|
||||
constructor(init?: Omit<Partial<OpenAIEmbedding>, "session">) {
|
||||
const {
|
||||
apiKey = getEnv("TOGETHER_API_KEY"),
|
||||
additionalSessionOptions = {},
|
||||
|
||||
@@ -14,12 +14,10 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
|
||||
retriever: BaseRetriever;
|
||||
responseSynthesizer: BaseSynthesizer;
|
||||
nodePostprocessors: BaseNodePostprocessor[];
|
||||
preFilters?: unknown;
|
||||
|
||||
constructor(
|
||||
retriever: BaseRetriever,
|
||||
responseSynthesizer?: BaseSynthesizer,
|
||||
preFilters?: unknown,
|
||||
nodePostprocessors?: BaseNodePostprocessor[],
|
||||
) {
|
||||
super(async (strOrQueryBundle, stream) => {
|
||||
@@ -52,7 +50,6 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
|
||||
this.retriever = retriever;
|
||||
this.responseSynthesizer =
|
||||
responseSynthesizer || getResponseSynthesizer("compact");
|
||||
this.preFilters = preFilters;
|
||||
this.nodePostprocessors = nodePostprocessors || [];
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
import type { LLM } from "@llamaindex/core/llms";
|
||||
import {
|
||||
PromptTemplate,
|
||||
defaultKeywordExtractPrompt,
|
||||
defaultQuestionExtractPrompt,
|
||||
defaultSummaryPrompt,
|
||||
defaultTitleCombinePromptTemplate,
|
||||
defaultTitleExtractorPromptTemplate,
|
||||
type KeywordExtractPrompt,
|
||||
type QuestionExtractPrompt,
|
||||
type SummaryPrompt,
|
||||
type TitleCombinePrompt,
|
||||
type TitleExtractorPrompt,
|
||||
} from "@llamaindex/core/prompts";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode, TextNode } from "@llamaindex/core/schema";
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
import {
|
||||
defaultKeywordExtractorPromptTemplate,
|
||||
defaultQuestionAnswerPromptTemplate,
|
||||
defaultSummaryExtractorPromptTemplate,
|
||||
defaultTitleCombinePromptTemplate,
|
||||
defaultTitleExtractorPromptTemplate,
|
||||
} from "./prompts.js";
|
||||
import { BaseExtractor } from "./types.js";
|
||||
|
||||
const STRIP_REGEX = /(\r\n|\n|\r)/gm;
|
||||
@@ -16,6 +22,7 @@ const STRIP_REGEX = /(\r\n|\n|\r)/gm;
|
||||
type KeywordExtractArgs = {
|
||||
llm?: LLM;
|
||||
keywords?: number;
|
||||
promptTemplate?: KeywordExtractPrompt["template"];
|
||||
};
|
||||
|
||||
type ExtractKeyword = {
|
||||
@@ -39,6 +46,12 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
*/
|
||||
keywords: number = 5;
|
||||
|
||||
/**
|
||||
* The prompt template to use for the question extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
promptTemplate: KeywordExtractPrompt;
|
||||
|
||||
/**
|
||||
* Constructor for the KeywordExtractor class.
|
||||
* @param {LLM} llm LLM instance.
|
||||
@@ -53,6 +66,12 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.keywords = options?.keywords ?? 5;
|
||||
this.promptTemplate = options?.promptTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context", "maxKeywords"],
|
||||
template: options.promptTemplate,
|
||||
})
|
||||
: defaultKeywordExtractPrompt;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -66,9 +85,9 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
}
|
||||
|
||||
const completion = await this.llm.complete({
|
||||
prompt: defaultKeywordExtractorPromptTemplate({
|
||||
contextStr: node.getContent(MetadataMode.ALL),
|
||||
keywords: this.keywords,
|
||||
prompt: this.promptTemplate.format({
|
||||
context: node.getContent(MetadataMode.ALL),
|
||||
maxKeywords: this.keywords.toString(),
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -93,8 +112,8 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
type TitleExtractorsArgs = {
|
||||
llm?: LLM;
|
||||
nodes?: number;
|
||||
nodeTemplate?: string;
|
||||
combineTemplate?: string;
|
||||
nodeTemplate?: TitleExtractorPrompt["template"];
|
||||
combineTemplate?: TitleCombinePrompt["template"];
|
||||
};
|
||||
|
||||
type ExtractTitle = {
|
||||
@@ -129,19 +148,19 @@ export class TitleExtractor extends BaseExtractor {
|
||||
* The prompt template to use for the title extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
nodeTemplate: string;
|
||||
nodeTemplate: TitleExtractorPrompt;
|
||||
|
||||
/**
|
||||
* The prompt template to merge title with..
|
||||
* @type {string}
|
||||
*/
|
||||
combineTemplate: string;
|
||||
combineTemplate: TitleCombinePrompt;
|
||||
|
||||
/**
|
||||
* Constructor for the TitleExtractor class.
|
||||
* @param {LLM} llm LLM instance.
|
||||
* @param {number} nodes Number of nodes to extract titles from.
|
||||
* @param {string} nodeTemplate The prompt template to use for the title extractor.
|
||||
* @param {TitleExtractorPrompt} nodeTemplate The prompt template to use for the title extractor.
|
||||
* @param {string} combineTemplate The prompt template to merge title with..
|
||||
*/
|
||||
constructor(options?: TitleExtractorsArgs) {
|
||||
@@ -150,10 +169,19 @@ export class TitleExtractor extends BaseExtractor {
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.nodes = options?.nodes ?? 5;
|
||||
|
||||
this.nodeTemplate =
|
||||
options?.nodeTemplate ?? defaultTitleExtractorPromptTemplate();
|
||||
this.combineTemplate =
|
||||
options?.combineTemplate ?? defaultTitleCombinePromptTemplate();
|
||||
this.nodeTemplate = options?.nodeTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: options.nodeTemplate,
|
||||
})
|
||||
: defaultTitleExtractorPromptTemplate;
|
||||
|
||||
this.combineTemplate = options?.combineTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: options.combineTemplate,
|
||||
})
|
||||
: defaultTitleCombinePromptTemplate;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -218,8 +246,8 @@ export class TitleExtractor extends BaseExtractor {
|
||||
const titleCandidates = await this.getTitlesCandidates(nodes);
|
||||
const combinedTitles = titleCandidates.join(", ");
|
||||
const completion = await this.llm.complete({
|
||||
prompt: defaultTitleCombinePromptTemplate({
|
||||
contextStr: combinedTitles,
|
||||
prompt: this.combineTemplate.format({
|
||||
context: combinedTitles,
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -232,8 +260,8 @@ export class TitleExtractor extends BaseExtractor {
|
||||
private async getTitlesCandidates(nodes: BaseNode[]): Promise<string[]> {
|
||||
const titleJobs = nodes.map(async (node) => {
|
||||
const completion = await this.llm.complete({
|
||||
prompt: defaultTitleExtractorPromptTemplate({
|
||||
contextStr: node.getContent(MetadataMode.ALL),
|
||||
prompt: this.nodeTemplate.format({
|
||||
context: node.getContent(MetadataMode.ALL),
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -247,7 +275,7 @@ export class TitleExtractor extends BaseExtractor {
|
||||
type QuestionAnswerExtractArgs = {
|
||||
llm?: LLM;
|
||||
questions?: number;
|
||||
promptTemplate?: string;
|
||||
promptTemplate?: QuestionExtractPrompt["template"];
|
||||
embeddingOnly?: boolean;
|
||||
};
|
||||
|
||||
@@ -276,7 +304,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
* The prompt template to use for the question extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
promptTemplate: string;
|
||||
promptTemplate: QuestionExtractPrompt;
|
||||
|
||||
/**
|
||||
* Wheter to use metadata for embeddings only
|
||||
@@ -289,7 +317,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
* Constructor for the QuestionsAnsweredExtractor class.
|
||||
* @param {LLM} llm LLM instance.
|
||||
* @param {number} questions Number of questions to generate.
|
||||
* @param {string} promptTemplate The prompt template to use for the question extractor.
|
||||
* @param {TextQAPrompt} promptTemplate The prompt template to use for the question extractor.
|
||||
* @param {boolean} embeddingOnly Wheter to use metadata for embeddings only.
|
||||
*/
|
||||
constructor(options?: QuestionAnswerExtractArgs) {
|
||||
@@ -300,12 +328,14 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.questions = options?.questions ?? 5;
|
||||
this.promptTemplate =
|
||||
options?.promptTemplate ??
|
||||
defaultQuestionAnswerPromptTemplate({
|
||||
numQuestions: this.questions,
|
||||
contextStr: "",
|
||||
});
|
||||
this.promptTemplate = options?.promptTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["numQuestions", "context"],
|
||||
template: options.promptTemplate,
|
||||
}).partialFormat({
|
||||
numQuestions: "5",
|
||||
})
|
||||
: defaultQuestionExtractPrompt;
|
||||
this.embeddingOnly = options?.embeddingOnly ?? false;
|
||||
}
|
||||
|
||||
@@ -323,9 +353,9 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
|
||||
const contextStr = node.getContent(this.metadataMode);
|
||||
|
||||
const prompt = defaultQuestionAnswerPromptTemplate({
|
||||
contextStr,
|
||||
numQuestions: this.questions,
|
||||
const prompt = this.promptTemplate.format({
|
||||
context: contextStr,
|
||||
numQuestions: this.questions.toString(),
|
||||
});
|
||||
|
||||
const questions = await this.llm.complete({
|
||||
@@ -356,7 +386,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
type SummaryExtractArgs = {
|
||||
llm?: LLM;
|
||||
summaries?: string[];
|
||||
promptTemplate?: string;
|
||||
promptTemplate?: SummaryPrompt["template"];
|
||||
};
|
||||
|
||||
type ExtractSummary = {
|
||||
@@ -385,7 +415,7 @@ export class SummaryExtractor extends BaseExtractor {
|
||||
* The prompt template to use for the summary extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
promptTemplate: string;
|
||||
promptTemplate: SummaryPrompt;
|
||||
|
||||
private selfSummary: boolean;
|
||||
private prevSummary: boolean;
|
||||
@@ -404,8 +434,12 @@ export class SummaryExtractor extends BaseExtractor {
|
||||
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.summaries = summaries;
|
||||
this.promptTemplate =
|
||||
options?.promptTemplate ?? defaultSummaryExtractorPromptTemplate();
|
||||
this.promptTemplate = options?.promptTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: options.promptTemplate,
|
||||
})
|
||||
: defaultSummaryPrompt;
|
||||
|
||||
this.selfSummary = summaries?.includes("self") ?? false;
|
||||
this.prevSummary = summaries?.includes("prev") ?? false;
|
||||
@@ -422,10 +456,10 @@ export class SummaryExtractor extends BaseExtractor {
|
||||
return "";
|
||||
}
|
||||
|
||||
const contextStr = node.getContent(this.metadataMode);
|
||||
const context = node.getContent(this.metadataMode);
|
||||
|
||||
const prompt = defaultSummaryExtractorPromptTemplate({
|
||||
contextStr,
|
||||
const prompt = this.promptTemplate.format({
|
||||
context,
|
||||
});
|
||||
|
||||
const summary = await this.llm.complete({
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
export interface DefaultPromptTemplate {
|
||||
contextStr: string;
|
||||
}
|
||||
|
||||
export interface DefaultKeywordExtractorPromptTemplate
|
||||
extends DefaultPromptTemplate {
|
||||
keywords: number;
|
||||
}
|
||||
|
||||
export interface DefaultQuestionAnswerPromptTemplate
|
||||
extends DefaultPromptTemplate {
|
||||
numQuestions: number;
|
||||
}
|
||||
|
||||
export interface DefaultNodeTextTemplate {
|
||||
metadataStr: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
export const defaultKeywordExtractorPromptTemplate = ({
|
||||
contextStr = "",
|
||||
keywords = 5,
|
||||
}: DefaultKeywordExtractorPromptTemplate) => `${contextStr}
|
||||
Give ${keywords} unique keywords for this document.
|
||||
Format as comma separated.
|
||||
Keywords: `;
|
||||
|
||||
export const defaultTitleExtractorPromptTemplate = (
|
||||
{ contextStr = "" }: DefaultPromptTemplate = {
|
||||
contextStr: "",
|
||||
},
|
||||
) => `${contextStr}
|
||||
Give a title that summarizes all of the unique entities, titles or themes found in the context.
|
||||
Title: `;
|
||||
|
||||
export const defaultTitleCombinePromptTemplate = (
|
||||
{ contextStr = "" }: DefaultPromptTemplate = {
|
||||
contextStr: "",
|
||||
},
|
||||
) => `${contextStr}
|
||||
Based on the above candidate titles and contents, what is the comprehensive title for this document?
|
||||
Title: `;
|
||||
|
||||
export const defaultQuestionAnswerPromptTemplate = (
|
||||
{ contextStr = "", numQuestions = 5 }: DefaultQuestionAnswerPromptTemplate = {
|
||||
contextStr: "",
|
||||
numQuestions: 5,
|
||||
},
|
||||
) => `${contextStr}
|
||||
Given the contextual informations, generate ${numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provideds as well.
|
||||
Try using these summaries to generate better questions that this context can answer.
|
||||
`;
|
||||
|
||||
export const defaultSummaryExtractorPromptTemplate = (
|
||||
{ contextStr = "" }: DefaultPromptTemplate = {
|
||||
contextStr: "",
|
||||
},
|
||||
) => `${contextStr}
|
||||
Summarize the key topics and entities of the sections.
|
||||
Summary: `;
|
||||
|
||||
export const defaultNodeTextTemplate = ({
|
||||
metadataStr = "",
|
||||
content = "",
|
||||
}: {
|
||||
metadataStr?: string;
|
||||
content?: string;
|
||||
} = {}) => `[Excerpt from document]
|
||||
${metadataStr}
|
||||
Excerpt:
|
||||
-----
|
||||
${content}
|
||||
-----
|
||||
`;
|
||||
@@ -1,10 +1,10 @@
|
||||
import { defaultNodeTextTemplate } from "@llamaindex/core/prompts";
|
||||
import {
|
||||
BaseNode,
|
||||
MetadataMode,
|
||||
TextNode,
|
||||
TransformComponent,
|
||||
} from "@llamaindex/core/schema";
|
||||
import { defaultNodeTextTemplate } from "./prompts.js";
|
||||
|
||||
/*
|
||||
* Abstract class for all extractors.
|
||||
@@ -71,7 +71,7 @@ export abstract class BaseExtractor extends TransformComponent {
|
||||
if (newNodes[idx] instanceof TextNode) {
|
||||
newNodes[idx] = new TextNode({
|
||||
...newNodes[idx],
|
||||
textTemplate: defaultNodeTextTemplate(),
|
||||
textTemplate: defaultNodeTextTemplate.format(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,20 @@
|
||||
//#region initial setup for OpenAI
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
import { Settings } from "./Settings.js";
|
||||
|
||||
try {
|
||||
Settings.llm;
|
||||
} catch {
|
||||
Settings.llm = new OpenAI();
|
||||
}
|
||||
|
||||
//#endregion
|
||||
|
||||
export {
|
||||
LlamaParseReader,
|
||||
type Language,
|
||||
type ResultType,
|
||||
} from "@llamaindex/cloud/reader";
|
||||
export * from "@llamaindex/core/agent";
|
||||
export * from "@llamaindex/core/chat-engine";
|
||||
export {
|
||||
@@ -23,12 +40,12 @@ export type {
|
||||
JSONArray,
|
||||
JSONObject,
|
||||
JSONValue,
|
||||
LlamaIndexEventMaps,
|
||||
LLMEndEvent,
|
||||
LLMStartEvent,
|
||||
LLMStreamEvent,
|
||||
LLMToolCallEvent,
|
||||
LLMToolResultEvent,
|
||||
LlamaIndexEventMaps,
|
||||
} from "@llamaindex/core/global";
|
||||
export * from "@llamaindex/core/indices";
|
||||
export * from "@llamaindex/core/llms";
|
||||
@@ -56,7 +73,7 @@ export * from "./postprocessors/index.js";
|
||||
export * from "./QuestionGenerator.js";
|
||||
export * from "./selectors/index.js";
|
||||
export * from "./ServiceContext.js";
|
||||
export { Settings } from "./Settings.js";
|
||||
export * from "./storage/StorageContext.js";
|
||||
export * from "./tools/index.js";
|
||||
export * from "./types.js";
|
||||
export { Settings };
|
||||
|
||||
@@ -246,7 +246,6 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
|
||||
return new RetrieverQueryEngine(
|
||||
retriever ?? this.asRetriever(),
|
||||
responseSynthesizer,
|
||||
options?.preFilters,
|
||||
options?.nodePostprocessors,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -189,7 +189,6 @@ export class SummaryIndex extends BaseIndex<IndexList> {
|
||||
return new RetrieverQueryEngine(
|
||||
retriever,
|
||||
responseSynthesizer,
|
||||
options?.preFilters,
|
||||
options?.nodePostprocessors,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -31,8 +31,8 @@ import type { StorageContext } from "../../storage/StorageContext.js";
|
||||
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
|
||||
import type { BaseIndexStore } from "../../storage/indexStore/types.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
MetadataFilters,
|
||||
VectorStore,
|
||||
VectorStoreByType,
|
||||
VectorStoreQueryResult,
|
||||
} from "../../vector-store/index.js";
|
||||
@@ -203,7 +203,10 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
} = {},
|
||||
): Promise<VectorStoreIndex> {
|
||||
args.storageContext =
|
||||
args.storageContext ?? (await storageContextFromDefaults({}));
|
||||
args.storageContext ??
|
||||
(await storageContextFromDefaults({
|
||||
serviceContext: args.serviceContext,
|
||||
}));
|
||||
args.vectorStores = args.vectorStores ?? args.storageContext.vectorStores;
|
||||
args.docStoreStrategy =
|
||||
args.docStoreStrategy ??
|
||||
@@ -261,7 +264,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
}
|
||||
|
||||
static async fromVectorStore(
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
serviceContext?: ServiceContext,
|
||||
) {
|
||||
return this.fromVectorStores(
|
||||
@@ -295,9 +298,8 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
similarityTopK,
|
||||
} = options ?? {};
|
||||
return new RetrieverQueryEngine(
|
||||
retriever ?? this.asRetriever({ similarityTopK }),
|
||||
retriever ?? this.asRetriever({ similarityTopK, filters: preFilters }),
|
||||
responseSynthesizer,
|
||||
preFilters,
|
||||
nodePostprocessors,
|
||||
);
|
||||
}
|
||||
@@ -305,7 +307,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
protected async insertNodesToStore(
|
||||
newIds: string[],
|
||||
nodes: BaseNode[],
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
): Promise<void> {
|
||||
// NOTE: if the vector store doesn't store text,
|
||||
// we need to add the nodes to the index struct and document store
|
||||
@@ -355,7 +357,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
}
|
||||
|
||||
protected async deleteRefDocFromStore(
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
refDocId: string,
|
||||
): Promise<void> {
|
||||
await vectorStore.delete(refDocId);
|
||||
@@ -384,7 +386,7 @@ export type VectorIndexRetrieverOptions = {
|
||||
index: VectorStoreIndex;
|
||||
similarityTopK?: number | undefined;
|
||||
topK?: TopKMap | undefined;
|
||||
filters?: MetadataFilters;
|
||||
filters?: MetadataFilters | undefined;
|
||||
};
|
||||
|
||||
export class VectorIndexRetriever extends BaseRetriever {
|
||||
@@ -423,7 +425,7 @@ export class VectorIndexRetriever extends BaseRetriever {
|
||||
let nodesWithScores: NodeWithScore[] = [];
|
||||
|
||||
for (const type in vectorStores) {
|
||||
const vectorStore: VectorStore = vectorStores[type as ModalityType]!;
|
||||
const vectorStore: BaseVectorStore = vectorStores[type as ModalityType]!;
|
||||
nodesWithScores = nodesWithScores.concat(
|
||||
await this.retrieveQuery(query, type as ModalityType, vectorStore),
|
||||
);
|
||||
@@ -434,7 +436,7 @@ export class VectorIndexRetriever extends BaseRetriever {
|
||||
protected async retrieveQuery(
|
||||
query: MessageContent,
|
||||
type: ModalityType,
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
filters?: MetadataFilters,
|
||||
): Promise<NodeWithScore[]> {
|
||||
// convert string message to multi-modal format
|
||||
|
||||
@@ -7,7 +7,10 @@ import {
|
||||
type Metadata,
|
||||
} from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../storage/docStore/types.js";
|
||||
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
|
||||
import {
|
||||
DocStoreStrategy,
|
||||
@@ -59,7 +62,7 @@ export class IngestionPipeline {
|
||||
transformations: TransformComponent[] = [];
|
||||
documents?: Document[] | undefined;
|
||||
reader?: BaseReader | undefined;
|
||||
vectorStore?: VectorStore | undefined;
|
||||
vectorStore?: BaseVectorStore | undefined;
|
||||
vectorStores?: VectorStoreByType | undefined;
|
||||
docStore?: BaseDocumentStore;
|
||||
docStoreStrategy: DocStoreStrategy = DocStoreStrategy.UPSERTS;
|
||||
@@ -133,7 +136,7 @@ export async function addNodesToVectorStores(
|
||||
nodesAdded?: (
|
||||
newIds: string[],
|
||||
nodes: BaseNode<Metadata>[],
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
) => Promise<void>,
|
||||
) {
|
||||
const nodeMap = splitNodesByType(nodes);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { VectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import { classify } from "./classify.js";
|
||||
|
||||
/**
|
||||
@@ -9,9 +9,9 @@ import { classify } from "./classify.js";
|
||||
*/
|
||||
export class UpsertsAndDeleteStrategy extends TransformComponent {
|
||||
protected docStore: BaseDocumentStore;
|
||||
protected vectorStores: VectorStore[] | undefined;
|
||||
protected vectorStores: BaseVectorStore[] | undefined;
|
||||
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: VectorStore[]) {
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: BaseVectorStore[]) {
|
||||
super(async (nodes: BaseNode[]): Promise<BaseNode[]> => {
|
||||
const { dedupedNodes, missingDocs, unusedDocs } = await classify(
|
||||
this.docStore,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { VectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import { classify } from "./classify.js";
|
||||
|
||||
/**
|
||||
@@ -8,9 +8,9 @@ import { classify } from "./classify.js";
|
||||
*/
|
||||
export class UpsertsStrategy extends TransformComponent {
|
||||
protected docStore: BaseDocumentStore;
|
||||
protected vectorStores: VectorStore[] | undefined;
|
||||
protected vectorStores: BaseVectorStore[] | undefined;
|
||||
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: VectorStore[]) {
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: BaseVectorStore[]) {
|
||||
super(async (nodes: BaseNode[]): Promise<BaseNode[]> => {
|
||||
const { dedupedNodes, unusedDocs } = await classify(this.docStore, nodes);
|
||||
// remove unused docs
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { VectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
|
||||
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
|
||||
import { UpsertsStrategy } from "./UpsertsStrategy.js";
|
||||
@@ -28,7 +28,7 @@ class NoOpStrategy extends TransformComponent {
|
||||
export function createDocStoreStrategy(
|
||||
docStoreStrategy: DocStoreStrategy,
|
||||
docStore?: BaseDocumentStore,
|
||||
vectorStores: VectorStore[] = [],
|
||||
vectorStores: BaseVectorStore[] = [],
|
||||
): TransformComponent {
|
||||
if (docStoreStrategy === DocStoreStrategy.NONE) {
|
||||
return new NoOpStrategy();
|
||||
|
||||
@@ -6,7 +6,7 @@ const DEFAULT_MODEL = "mistralai/Mixtral-8x22B-Instruct-v0.1";
|
||||
const BASE_URL = "https://api.deepinfra.com/v1/openai";
|
||||
|
||||
export class DeepInfra extends OpenAI {
|
||||
constructor(init?: Partial<OpenAI>) {
|
||||
constructor(init?: Omit<Partial<OpenAI>, "session">) {
|
||||
const {
|
||||
apiKey = getEnv(ENV_VARIABLE_NAME),
|
||||
additionalSessionOptions = {},
|
||||
|
||||
@@ -10,7 +10,9 @@ type DeepSeekModelName = keyof typeof DEEPSEEK_MODELS;
|
||||
const DEFAULT_MODEL: DeepSeekModelName = "deepseek-coder";
|
||||
|
||||
export class DeepSeekLLM extends OpenAI {
|
||||
constructor(init?: Partial<OpenAI> & { model?: DeepSeekModelName }) {
|
||||
constructor(
|
||||
init?: Omit<Partial<OpenAI>, "session"> & { model?: DeepSeekModelName },
|
||||
) {
|
||||
const {
|
||||
apiKey = getEnv("DEEPSEEK_API_KEY"),
|
||||
additionalSessionOptions = {},
|
||||
|
||||
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
|
||||
export class FireworksLLM extends OpenAI {
|
||||
constructor(init?: Partial<OpenAI>) {
|
||||
constructor(init?: Omit<Partial<OpenAI>, "session">) {
|
||||
const {
|
||||
apiKey = getEnv("FIREWORKS_API_KEY"),
|
||||
additionalSessionOptions = {},
|
||||
|
||||
@@ -44,12 +44,15 @@ import {
|
||||
export const GEMINI_MODEL_INFO_MAP: Record<GEMINI_MODEL, GeminiModelInfo> = {
|
||||
[GEMINI_MODEL.GEMINI_PRO]: { contextWindow: 30720 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_VISION]: { contextWindow: 12288 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_LATEST]: { contextWindow: 10 ** 6 },
|
||||
// multi-modal/multi turn
|
||||
[GEMINI_MODEL.GEMINI_PRO_LATEST]: { contextWindow: 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_FLASH_LATEST]: { contextWindow: 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_1_5_PRO_PREVIEW]: { contextWindow: 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_1_5_FLASH_PREVIEW]: { contextWindow: 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_1_5]: { contextWindow: 2 * 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_1_5_FLASH]: { contextWindow: 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_1_5_LATEST]: { contextWindow: 2 * 10 ** 6 },
|
||||
[GEMINI_MODEL.GEMINI_PRO_1_5_FLASH_LATEST]: { contextWindow: 10 ** 6 },
|
||||
};
|
||||
|
||||
const SUPPORT_TOOL_CALL_MODELS: GEMINI_MODEL[] = [
|
||||
@@ -59,6 +62,10 @@ const SUPPORT_TOOL_CALL_MODELS: GEMINI_MODEL[] = [
|
||||
GEMINI_MODEL.GEMINI_PRO_1_5_FLASH_PREVIEW,
|
||||
GEMINI_MODEL.GEMINI_PRO_1_5,
|
||||
GEMINI_MODEL.GEMINI_PRO_1_5_FLASH,
|
||||
GEMINI_MODEL.GEMINI_PRO_LATEST,
|
||||
GEMINI_MODEL.GEMINI_PRO_FLASH_LATEST,
|
||||
GEMINI_MODEL.GEMINI_PRO_1_5_LATEST,
|
||||
GEMINI_MODEL.GEMINI_PRO_1_5_FLASH_LATEST,
|
||||
];
|
||||
|
||||
const DEFAULT_GEMINI_PARAMS = {
|
||||
|
||||
@@ -56,10 +56,14 @@ export enum GEMINI_MODEL {
|
||||
GEMINI_PRO = "gemini-pro",
|
||||
GEMINI_PRO_VISION = "gemini-pro-vision",
|
||||
GEMINI_PRO_LATEST = "gemini-1.5-pro-latest",
|
||||
GEMINI_PRO_FLASH_LATEST = "gemini-1.5-flash-latest",
|
||||
GEMINI_PRO_1_5_PRO_PREVIEW = "gemini-1.5-pro-preview-0514",
|
||||
GEMINI_PRO_1_5_FLASH_PREVIEW = "gemini-1.5-flash-preview-0514",
|
||||
GEMINI_PRO_1_5 = "gemini-1.5-pro-001",
|
||||
GEMINI_PRO_1_5_FLASH = "gemini-1.5-flash-001",
|
||||
// Note: should be switched to -latest suffix when google supports it
|
||||
GEMINI_PRO_1_5_LATEST = "gemini-1.5-pro-002",
|
||||
GEMINI_PRO_1_5_FLASH_LATEST = "gemini-1.5-flash-002",
|
||||
}
|
||||
|
||||
export interface GeminiModelInfo {
|
||||
|
||||
@@ -2,7 +2,7 @@ import { getEnv } from "@llamaindex/env";
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
|
||||
export class TogetherLLM extends OpenAI {
|
||||
constructor(init?: Partial<OpenAI>) {
|
||||
constructor(init?: Omit<Partial<OpenAI>, "session">) {
|
||||
const {
|
||||
apiKey = getEnv("TOGETHER_API_KEY"),
|
||||
additionalSessionOptions = {},
|
||||
|
||||
@@ -37,12 +37,14 @@ export default function withLlamaIndex(config: any) {
|
||||
webpackConfig.resolve.alias = {
|
||||
...webpackConfig.resolve.alias,
|
||||
"@google-cloud/vertexai": false,
|
||||
unpdf: false,
|
||||
};
|
||||
// Following lines will fix issues with onnxruntime-node when using pnpm
|
||||
// See: https://github.com/vercel/next.js/issues/43433
|
||||
webpackConfig.externals.push({
|
||||
"onnxruntime-node": "commonjs onnxruntime-node",
|
||||
sharp: "commonjs sharp",
|
||||
chromadb: "commonjs chromadb",
|
||||
});
|
||||
return webpackConfig;
|
||||
};
|
||||
|
||||
@@ -5,8 +5,12 @@ import {
|
||||
import { ModalityType, ObjectType } from "@llamaindex/core/schema";
|
||||
import { path } from "@llamaindex/env";
|
||||
import { getImageEmbedModel } from "../internal/settings/image-embed-model.js";
|
||||
import type { ServiceContext } from "../ServiceContext.js";
|
||||
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
|
||||
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
|
||||
import type { BaseDocumentStore } from "./docStore/types.js";
|
||||
import { SimpleIndexStore } from "./indexStore/SimpleIndexStore.js";
|
||||
@@ -21,10 +25,14 @@ export interface StorageContext {
|
||||
type BuilderParams = {
|
||||
docStore: BaseDocumentStore;
|
||||
indexStore: BaseIndexStore;
|
||||
vectorStore: VectorStore;
|
||||
vectorStore: BaseVectorStore;
|
||||
vectorStores: VectorStoreByType;
|
||||
storeImages: boolean;
|
||||
persistDir: string;
|
||||
/**
|
||||
* @deprecated Please use `Settings` instead
|
||||
*/
|
||||
serviceContext?: ServiceContext | undefined;
|
||||
};
|
||||
|
||||
export async function storageContextFromDefaults({
|
||||
@@ -34,6 +42,7 @@ export async function storageContextFromDefaults({
|
||||
vectorStores,
|
||||
storeImages,
|
||||
persistDir,
|
||||
serviceContext,
|
||||
}: Partial<BuilderParams>): Promise<StorageContext> {
|
||||
vectorStores = vectorStores ?? {};
|
||||
if (!persistDir) {
|
||||
@@ -44,10 +53,11 @@ export async function storageContextFromDefaults({
|
||||
}
|
||||
if (storeImages && !(ModalityType.IMAGE in vectorStores)) {
|
||||
vectorStores[ModalityType.IMAGE] = new SimpleVectorStore({
|
||||
embedModel: new (await getImageEmbedModel())(),
|
||||
embeddingModel: new (await getImageEmbedModel())(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const embedModel = serviceContext?.embedModel;
|
||||
docStore =
|
||||
docStore ||
|
||||
(await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE));
|
||||
@@ -55,7 +65,8 @@ export async function storageContextFromDefaults({
|
||||
indexStore || (await SimpleIndexStore.fromPersistDir(persistDir));
|
||||
if (!(ObjectType.TEXT in vectorStores)) {
|
||||
vectorStores[ModalityType.TEXT] =
|
||||
vectorStore ?? (await SimpleVectorStore.fromPersistDir(persistDir));
|
||||
vectorStore ??
|
||||
(await SimpleVectorStore.fromPersistDir(persistDir, embedModel));
|
||||
}
|
||||
if (storeImages && !(ObjectType.IMAGE in vectorStores)) {
|
||||
vectorStores[ModalityType.IMAGE] = await SimpleVectorStore.fromPersistDir(
|
||||
|
||||
@@ -2,24 +2,30 @@ import {
|
||||
Collection,
|
||||
DataAPIClient,
|
||||
Db,
|
||||
type Filter,
|
||||
type FindOptions,
|
||||
type SomeDoc,
|
||||
} from "@datastax/astra-db-ts";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import {
|
||||
VectorStoreBase,
|
||||
type IEmbedModel,
|
||||
type VectorStoreNoEmbedModel,
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
} from "./utils.js";
|
||||
|
||||
export class AstraDBVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class AstraDBVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
|
||||
@@ -37,9 +43,9 @@ export class AstraDBVectorStore
|
||||
endpoint: string;
|
||||
namespace?: string;
|
||||
};
|
||||
} & Partial<IEmbedModel>,
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init?.embedModel);
|
||||
super(init);
|
||||
const token = init?.params?.token ?? getEnv("ASTRA_DB_APPLICATION_TOKEN");
|
||||
const endpoint = init?.params?.endpoint ?? getEnv("ASTRA_DB_API_ENDPOINT");
|
||||
|
||||
@@ -183,12 +189,8 @@ export class AstraDBVectorStore
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
const filters: Record<string, any> = {};
|
||||
query.filters?.filters?.forEach((f) => {
|
||||
filters[f.key] = f.value;
|
||||
});
|
||||
|
||||
const cursor = await collection.find(filters, <FindOptions>{
|
||||
const astraFilter = this.toAstraFilter(query.filters);
|
||||
const cursor = await collection.find(astraFilter, <FindOptions>{
|
||||
...options,
|
||||
sort: query.queryEmbedding
|
||||
? { $vector: query.queryEmbedding }
|
||||
@@ -230,4 +232,39 @@ export class AstraDBVectorStore
|
||||
nodes,
|
||||
};
|
||||
}
|
||||
|
||||
private toAstraFilter(filters?: MetadataFilters): Filter<SomeDoc> {
|
||||
if (!filters || filters.filters?.length === 0) return {};
|
||||
const condition = filters.condition ?? FilterCondition.AND;
|
||||
const listFilter = filters.filters.map((f) => this.buildFilterItem(f));
|
||||
if (condition === FilterCondition.OR) return { $or: listFilter };
|
||||
if (condition === FilterCondition.AND) return { $and: listFilter };
|
||||
throw new Error(`Not supported filter condition: ${condition}`);
|
||||
}
|
||||
|
||||
private buildFilterItem(filter: MetadataFilter): Filter<SomeDoc> {
|
||||
const { key, operator, value } = filter;
|
||||
switch (operator) {
|
||||
case FilterOperator.EQ:
|
||||
return { [key]: value };
|
||||
case FilterOperator.NE:
|
||||
return { [key]: { $ne: value } };
|
||||
case FilterOperator.GT:
|
||||
return { [key]: { $gt: value } };
|
||||
case FilterOperator.LT:
|
||||
return { [key]: { $lt: value } };
|
||||
case FilterOperator.GTE:
|
||||
return { [key]: { $gte: value } };
|
||||
case FilterOperator.LTE:
|
||||
return { [key]: { $lte: value } };
|
||||
case FilterOperator.IN:
|
||||
return { [key]: { $in: parseArrayValue(value) } };
|
||||
case FilterOperator.NIN:
|
||||
return { [key]: { $nin: parseArrayValue(value) } };
|
||||
case FilterOperator.IS_EMPTY:
|
||||
return { [key]: { $size: 0 } };
|
||||
default:
|
||||
throw new Error(`Not supported filter operator: ${operator}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,21 +2,20 @@ import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import {
|
||||
ChromaClient,
|
||||
IncludeEnum,
|
||||
type AddParams,
|
||||
type ChromaClientParams,
|
||||
type Collection,
|
||||
type DeleteParams,
|
||||
type QueryParams,
|
||||
type QueryRecordsParams,
|
||||
type QueryResponse,
|
||||
type Where,
|
||||
type WhereDocument,
|
||||
} from "chromadb";
|
||||
import {
|
||||
VectorStoreBase,
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
VectorStoreQueryMode,
|
||||
type IEmbedModel,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -31,12 +30,22 @@ type ChromaQueryOptions = {
|
||||
whereDocument?: WhereDocument;
|
||||
};
|
||||
|
||||
type Collection = Awaited<ReturnType<ChromaClient["getOrCreateCollection"]>>;
|
||||
|
||||
const DEFAULT_TEXT_KEY = "text";
|
||||
|
||||
export class ChromaVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
type ChromaFilterCondition = "$and" | "$or";
|
||||
type ChromaFilterOperator =
|
||||
| "$eq"
|
||||
| "$ne"
|
||||
| "$gt"
|
||||
| "$lt"
|
||||
| "$gte"
|
||||
| "$lte"
|
||||
| "$in"
|
||||
| "$nin";
|
||||
|
||||
export class ChromaVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
textKey: string;
|
||||
@@ -49,9 +58,9 @@ export class ChromaVectorStore
|
||||
collectionName: string;
|
||||
textKey?: string;
|
||||
chromaClientParams?: ChromaClientParams;
|
||||
} & Partial<IEmbedModel>,
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init.embedModel);
|
||||
super(init);
|
||||
this.collectionName = init.collectionName;
|
||||
this.chromaClient = new ChromaClient(init.chromaClientParams);
|
||||
this.textKey = init.textKey ?? DEFAULT_TEXT_KEY;
|
||||
@@ -71,7 +80,7 @@ export class ChromaVectorStore
|
||||
return this.collection;
|
||||
}
|
||||
|
||||
private getDataToInsert(nodes: BaseNode[]): AddParams {
|
||||
private getDataToInsert(nodes: BaseNode[]) {
|
||||
const metadatas = nodes.map((node) =>
|
||||
nodeToMetadata(node, true, this.textKey, this.flatMetadata),
|
||||
);
|
||||
@@ -106,6 +115,79 @@ export class ChromaVectorStore
|
||||
});
|
||||
}
|
||||
|
||||
private transformChromaFilterCondition(
|
||||
condition: FilterCondition,
|
||||
): ChromaFilterCondition {
|
||||
switch (condition) {
|
||||
case FilterCondition.AND:
|
||||
return "$and";
|
||||
case FilterCondition.OR:
|
||||
return "$or";
|
||||
default:
|
||||
throw new Error(`Filter condition ${condition} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
private transformChromaFilterOperator(
|
||||
operator: FilterOperator,
|
||||
): ChromaFilterOperator {
|
||||
switch (operator) {
|
||||
case FilterOperator.EQ:
|
||||
return "$eq";
|
||||
case FilterOperator.NE:
|
||||
return "$ne";
|
||||
case FilterOperator.GT:
|
||||
return "$gt";
|
||||
case FilterOperator.LT:
|
||||
return "$lt";
|
||||
case FilterOperator.GTE:
|
||||
return "$gte";
|
||||
case FilterOperator.LTE:
|
||||
return "$lte";
|
||||
case FilterOperator.IN:
|
||||
return "$in";
|
||||
case FilterOperator.NIN:
|
||||
return "$nin";
|
||||
default:
|
||||
throw new Error(`Filter operator ${operator} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
private toChromaFilter(filters: MetadataFilters): Where {
|
||||
const chromaFilter: Where = {};
|
||||
const filtersList: Where[] = [];
|
||||
|
||||
const condition = filters.condition
|
||||
? this.transformChromaFilterCondition(
|
||||
filters.condition as FilterCondition,
|
||||
)
|
||||
: "$and";
|
||||
|
||||
if (filters.filters) {
|
||||
for (const filter of filters.filters) {
|
||||
if (filter.operator) {
|
||||
filtersList.push({
|
||||
[filter.key]: {
|
||||
[this.transformChromaFilterOperator(
|
||||
filter.operator as FilterOperator,
|
||||
)]: filter.value,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
filtersList.push({ [filter.key]: filter.value });
|
||||
}
|
||||
}
|
||||
|
||||
if (filtersList.length === 1) {
|
||||
return filtersList[0]!;
|
||||
} else if (filtersList.length > 1) {
|
||||
chromaFilter[condition] = filtersList;
|
||||
}
|
||||
}
|
||||
|
||||
return chromaFilter;
|
||||
}
|
||||
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: ChromaQueryOptions,
|
||||
@@ -117,49 +199,22 @@ export class ChromaVectorStore
|
||||
throw new Error("ChromaDB does not support querying by mode");
|
||||
}
|
||||
|
||||
// fixme: type is broken
|
||||
let chromaWhere: any = {};
|
||||
if (query.filters?.filters) {
|
||||
query.filters.filters.map((filter) => {
|
||||
const filterKey = filter.key;
|
||||
const filterValue = filter.value;
|
||||
if (filterKey in chromaWhere || "$or" in chromaWhere) {
|
||||
if (!chromaWhere["$or"]) {
|
||||
chromaWhere = {
|
||||
$or: [
|
||||
{
|
||||
...chromaWhere,
|
||||
},
|
||||
{
|
||||
[filterKey]: filterValue,
|
||||
},
|
||||
],
|
||||
};
|
||||
} else {
|
||||
chromaWhere["$or"].push({
|
||||
[filterKey]: filterValue,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
chromaWhere[filterKey] = filterValue;
|
||||
}
|
||||
});
|
||||
let chromaWhere: Where = {};
|
||||
if (query.filters) {
|
||||
chromaWhere = this.toChromaFilter(query.filters);
|
||||
}
|
||||
|
||||
const collection = await this.getCollection();
|
||||
const queryResponse: QueryResponse = await collection.query(<QueryParams>{
|
||||
const queryResponse: QueryResponse = await collection.query(<
|
||||
QueryRecordsParams
|
||||
>{
|
||||
queryEmbeddings: query.queryEmbedding ?? undefined,
|
||||
queryTexts: query.queryStr ?? undefined,
|
||||
nResults: query.similarityTopK,
|
||||
where: Object.keys(chromaWhere).length ? chromaWhere : undefined,
|
||||
whereDocument: options?.whereDocument,
|
||||
//ChromaDB doesn't return the result embeddings by default so we need to include them
|
||||
include: [
|
||||
IncludeEnum.Distances,
|
||||
IncludeEnum.Metadatas,
|
||||
IncludeEnum.Documents,
|
||||
IncludeEnum.Embeddings,
|
||||
],
|
||||
include: ["distances", "metadatas", "documents", "embeddings"],
|
||||
});
|
||||
|
||||
const vectorStoreQueryResult: VectorStoreQueryResult = {
|
||||
|
||||
@@ -10,10 +10,9 @@ import {
|
||||
type SearchSimpleReq,
|
||||
} from "@zilliz/milvus2-sdk-node";
|
||||
import {
|
||||
VectorStoreBase,
|
||||
type IEmbedModel,
|
||||
BaseVectorStore,
|
||||
type MetadataFilters,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -72,12 +71,9 @@ function parseScalarFilters(scalarFilters: MetadataFilters): string {
|
||||
return filters.join(` ${condition} `);
|
||||
}
|
||||
|
||||
export class MilvusVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class MilvusVectorStore extends BaseVectorStore {
|
||||
public storesText: boolean = true;
|
||||
public isEmbeddingQuery?: boolean;
|
||||
public isEmbeddingQuery?: boolean = false;
|
||||
private flatMetadata: boolean = true;
|
||||
|
||||
private milvusClient: MilvusClient;
|
||||
@@ -91,7 +87,7 @@ export class MilvusVectorStore
|
||||
|
||||
constructor(
|
||||
init?: Partial<{ milvusClient: MilvusClient }> &
|
||||
Partial<IEmbedModel> & {
|
||||
VectorStoreBaseParams & {
|
||||
params?: {
|
||||
configOrAddress: ClientConfig | string;
|
||||
ssl?: boolean;
|
||||
@@ -106,7 +102,7 @@ export class MilvusVectorStore
|
||||
embeddingKey?: string;
|
||||
},
|
||||
) {
|
||||
super(init?.embedModel);
|
||||
super(init);
|
||||
if (init?.milvusClient) {
|
||||
this.milvusClient = init.milvusClient;
|
||||
} else {
|
||||
|
||||
@@ -5,12 +5,12 @@ import { getEnv } from "@llamaindex/env";
|
||||
import type { BulkWriteOptions, Collection } from "mongodb";
|
||||
import { MongoClient } from "mongodb";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
VectorStoreBase,
|
||||
type FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -67,10 +67,7 @@ function toMongoDBFilter(filters?: MetadataFilters): Record<string, any> {
|
||||
* Vector store that uses MongoDB Atlas for storage and vector search.
|
||||
* This store uses the $vectorSearch aggregation stage to perform vector similarity search.
|
||||
*/
|
||||
export class MongoDBAtlasVectorSearch
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class MongoDBAtlasVectorSearch extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
|
||||
@@ -147,9 +144,9 @@ export class MongoDBAtlasVectorSearch
|
||||
autoCreateIndex?: boolean;
|
||||
indexedMetadataFields?: string[];
|
||||
embeddingDefinition?: Record<string, unknown>;
|
||||
},
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init.embedModel);
|
||||
super(init);
|
||||
if (init.mongodbClient) {
|
||||
this.mongodbClient = init.mongodbClient;
|
||||
} else {
|
||||
@@ -274,7 +271,7 @@ export class MongoDBAtlasVectorSearch
|
||||
);
|
||||
}
|
||||
|
||||
get client(): any {
|
||||
client(): any {
|
||||
return this.mongodbClient;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,12 +4,12 @@ import type { IsomorphicDB } from "@llamaindex/core/vector-store";
|
||||
import type { VercelPool } from "@vercel/postgres";
|
||||
import type { Sql } from "postgres";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilterValue,
|
||||
VectorStoreBase,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -116,7 +116,8 @@ type PGVectorStoreBaseConfig = {
|
||||
embedModel?: BaseEmbedding | undefined;
|
||||
};
|
||||
|
||||
export type PGVectorStoreConfig = PGVectorStoreBaseConfig &
|
||||
export type PGVectorStoreConfig = VectorStoreBaseParams &
|
||||
PGVectorStoreBaseConfig &
|
||||
(
|
||||
| {
|
||||
/**
|
||||
@@ -147,10 +148,7 @@ export type PGVectorStoreConfig = PGVectorStoreBaseConfig &
|
||||
* Provides support for writing and querying vector data in Postgres.
|
||||
* Note: Can't be used with data created using the Python version of the vector store (https://docs.llamaindex.ai/en/stable/examples/vector_stores/postgres/)
|
||||
*/
|
||||
export class PGVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class PGVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
private collection: string = DEFAULT_COLLECTION;
|
||||
@@ -163,7 +161,7 @@ export class PGVectorStore
|
||||
private readonly clientConfig: pg.ClientConfig | null = null;
|
||||
|
||||
constructor(config: PGVectorStoreConfig) {
|
||||
super(config?.embedModel);
|
||||
super(config);
|
||||
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
|
||||
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
|
||||
this.dimensions = config?.dimensions ?? DEFAULT_DIMENSIONS;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import {
|
||||
VectorStoreBase,
|
||||
type IEmbedModel,
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -23,15 +24,12 @@ type PineconeParams = {
|
||||
chunkSize?: number;
|
||||
namespace?: string;
|
||||
textKey?: string;
|
||||
} & IEmbedModel;
|
||||
} & VectorStoreBaseParams;
|
||||
|
||||
/**
|
||||
* Provides support for writing and querying vector data in Pinecone.
|
||||
*/
|
||||
export class PineconeVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class PineconeVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
/*
|
||||
@@ -49,7 +47,7 @@ export class PineconeVectorStore
|
||||
textKey: string;
|
||||
|
||||
constructor(params?: PineconeParams) {
|
||||
super(params?.embedModel);
|
||||
super(params);
|
||||
this.indexName =
|
||||
params?.indexName ?? getEnv("PINECONE_INDEX_NAME") ?? "llama";
|
||||
this.namespace = params?.namespace ?? getEnv("PINECONE_NAMESPACE") ?? "";
|
||||
@@ -198,14 +196,60 @@ export class PineconeVectorStore
|
||||
}
|
||||
|
||||
toPineconeFilter(stdFilters?: MetadataFilters) {
|
||||
return stdFilters?.filters?.reduce((carry: any, item: MetadataFilter) => {
|
||||
// Use MetadataFilter with EQ operator to replace ExactMatchFilter
|
||||
// TODO: support filter with other operators
|
||||
if (item.operator === "==") {
|
||||
carry[item.key] = item.value;
|
||||
if (!stdFilters) return undefined;
|
||||
|
||||
const transformCondition = (
|
||||
condition: `${FilterCondition}` = "and",
|
||||
): string => {
|
||||
if (condition === "and") return "$and";
|
||||
if (condition === "or") return "$or";
|
||||
throw new Error(`Filter condition ${condition} not supported`);
|
||||
};
|
||||
|
||||
const transformOperator = (operator: `${FilterOperator}`): string => {
|
||||
switch (operator) {
|
||||
case "!=":
|
||||
return "$ne";
|
||||
case "==":
|
||||
return "$eq";
|
||||
case ">":
|
||||
return "$gt";
|
||||
case "<":
|
||||
return "$lt";
|
||||
case ">=":
|
||||
return "$gte";
|
||||
case "<=":
|
||||
return "$lte";
|
||||
case "in":
|
||||
return "$in";
|
||||
case "nin":
|
||||
return "$nin";
|
||||
default:
|
||||
throw new Error(`Filter operator ${operator} not supported`);
|
||||
}
|
||||
return carry;
|
||||
}, {});
|
||||
};
|
||||
|
||||
const convertFilterItem = (filter: MetadataFilter) => {
|
||||
return {
|
||||
[filter.key]: {
|
||||
[transformOperator(filter.operator)]: filter.value,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
const convertFilter = (filter: MetadataFilters) => {
|
||||
const filtersList = filter.filters
|
||||
.map((f) => convertFilterItem(f))
|
||||
.filter((f) => Object.keys(f).length > 0);
|
||||
|
||||
if (filtersList.length === 0) return undefined;
|
||||
if (filtersList.length === 1) return filtersList[0];
|
||||
|
||||
const condition = transformCondition(filter.condition);
|
||||
return { [condition]: filtersList };
|
||||
};
|
||||
|
||||
return convertFilter(stdFilters);
|
||||
}
|
||||
|
||||
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
|
||||
|
||||
@@ -1,16 +1,21 @@
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import {
|
||||
VectorStoreBase,
|
||||
type IEmbedModel,
|
||||
type VectorStoreNoEmbedModel,
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
|
||||
import type { QdrantClientParams } from "@qdrant/js-client-rest";
|
||||
import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest";
|
||||
import { QdrantClient } from "@qdrant/js-client-rest";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
type QdrantFilter = Schemas["Filter"];
|
||||
type QdrantMustConditions = QdrantFilter["must"];
|
||||
|
||||
type PointStruct = {
|
||||
id: string;
|
||||
payload: Record<string, string>;
|
||||
@@ -23,7 +28,7 @@ type QdrantParams = {
|
||||
url?: string;
|
||||
apiKey?: string;
|
||||
batchSize?: number;
|
||||
} & Partial<IEmbedModel>;
|
||||
} & VectorStoreBaseParams;
|
||||
|
||||
type QuerySearchResult = {
|
||||
id: string;
|
||||
@@ -36,10 +41,7 @@ type QuerySearchResult = {
|
||||
/**
|
||||
* Qdrant vector store.
|
||||
*/
|
||||
export class QdrantVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class QdrantVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
batchSize: number;
|
||||
@@ -63,9 +65,9 @@ export class QdrantVectorStore
|
||||
url,
|
||||
apiKey,
|
||||
batchSize,
|
||||
embedModel,
|
||||
...init
|
||||
}: QdrantParams) {
|
||||
super(embedModel);
|
||||
super(init);
|
||||
if (!client && !url) {
|
||||
if (!url) {
|
||||
throw new Error("QdrantVectorStore requires url and collectionName");
|
||||
@@ -272,7 +274,7 @@ export class QdrantVectorStore
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const qdrantFilters = options?.qdrant_filters;
|
||||
|
||||
let queryFilters;
|
||||
let queryFilters: QdrantFilter | undefined;
|
||||
|
||||
if (!query.queryEmbedding) {
|
||||
throw new Error("No query embedding provided");
|
||||
@@ -281,7 +283,7 @@ export class QdrantVectorStore
|
||||
if (qdrantFilters) {
|
||||
queryFilters = qdrantFilters;
|
||||
} else {
|
||||
queryFilters = await this.buildQueryFilter(query);
|
||||
queryFilters = buildQueryFilter(query);
|
||||
}
|
||||
|
||||
const result = (await this.db.search(this.collectionName, {
|
||||
@@ -292,58 +294,118 @@ export class QdrantVectorStore
|
||||
|
||||
return this.parseToQueryResult(result);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Qdrant filter builder
|
||||
* @param query The VectorStoreQuery to be used
|
||||
*/
|
||||
private async buildQueryFilter(query: VectorStoreQuery) {
|
||||
if (!query.docIds && !query.queryStr && !query.filters) {
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Qdrant filter builder
|
||||
* @param query The VectorStoreQuery to be used
|
||||
*/
|
||||
function buildQueryFilter(query: VectorStoreQuery): QdrantFilter | undefined {
|
||||
if (!query.docIds && !query.queryStr && !query.filters) return undefined;
|
||||
|
||||
const mustConditions = [];
|
||||
const mustConditions: QdrantMustConditions = [];
|
||||
if (query.docIds) {
|
||||
mustConditions.push({
|
||||
key: "doc_id",
|
||||
match: { any: query.docIds },
|
||||
});
|
||||
}
|
||||
|
||||
if (query.docIds) {
|
||||
mustConditions.push({
|
||||
key: "doc_id",
|
||||
match: {
|
||||
any: query.docIds,
|
||||
},
|
||||
});
|
||||
}
|
||||
const metadataFilters = toQdrantMetadataFilters(query.filters);
|
||||
if (metadataFilters) {
|
||||
mustConditions.push(metadataFilters);
|
||||
}
|
||||
|
||||
if (!query.filters) {
|
||||
return {
|
||||
must: mustConditions,
|
||||
};
|
||||
}
|
||||
return { must: mustConditions };
|
||||
}
|
||||
|
||||
const metadataFilters = query.filters.filters;
|
||||
/**
|
||||
* Converts metadata filters to Qdrant-compatible filters
|
||||
* @param subFilters The metadata filters to be converted
|
||||
* @returns A QdrantFilter object or undefined if no valid filters are provided
|
||||
*/
|
||||
function toQdrantMetadataFilters(
|
||||
subFilters?: MetadataFilters,
|
||||
): QdrantFilter | undefined {
|
||||
if (!subFilters?.filters.length) return undefined;
|
||||
|
||||
for (let i = 0; i < metadataFilters.length; i++) {
|
||||
const filter = metadataFilters[i]!;
|
||||
const conditions: QdrantMustConditions = [];
|
||||
|
||||
if (typeof filter.key === "number") {
|
||||
mustConditions.push({
|
||||
key: filter.key,
|
||||
match: {
|
||||
gt: filter.value,
|
||||
lt: filter.value,
|
||||
for (const subfilter of subFilters.filters) {
|
||||
if (subfilter.operator === FilterOperator.EQ) {
|
||||
if (typeof subfilter.value === "number") {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: {
|
||||
gte: subfilter.value,
|
||||
lte: subfilter.value,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
mustConditions.push({
|
||||
key: filter.key,
|
||||
match: {
|
||||
value: filter.value,
|
||||
},
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { value: subfilter.value },
|
||||
});
|
||||
}
|
||||
} else if (subfilter.operator === FilterOperator.LT) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { lt: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.GT) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { gt: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.GTE) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { gte: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.LTE) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { lte: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.TEXT_MATCH) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { text: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.NE) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { except: [subfilter.value] },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.IN) {
|
||||
const values = Array.isArray(subfilter.value)
|
||||
? subfilter.value.map(String)
|
||||
: String(subfilter.value).split(",");
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { any: values },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.NIN) {
|
||||
const values = Array.isArray(subfilter.value)
|
||||
? subfilter.value.map(String)
|
||||
: String(subfilter.value).split(",");
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { except: values },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.IS_EMPTY) {
|
||||
conditions.push({
|
||||
is_empty: { key: subfilter.key },
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
must: mustConditions,
|
||||
};
|
||||
}
|
||||
|
||||
const filter: QdrantFilter = {};
|
||||
if (subFilters.condition === FilterCondition.OR) {
|
||||
filter.should = conditions;
|
||||
} else {
|
||||
filter.must = conditions;
|
||||
}
|
||||
|
||||
return filter;
|
||||
}
|
||||
|
||||
@@ -5,12 +5,12 @@ import { fs, path } from "@llamaindex/env";
|
||||
import { getTopKEmbeddings, getTopKMMREmbeddings } from "../internal/utils.js";
|
||||
import { exists } from "../storage/FileSystem.js";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterOperator,
|
||||
VectorStoreBase,
|
||||
VectorStoreQueryMode,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -120,19 +120,17 @@ class SimpleVectorStoreData {
|
||||
metadataDict: Record<string, MetadataValue> = {};
|
||||
}
|
||||
|
||||
export class SimpleVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class SimpleVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = false;
|
||||
private data: SimpleVectorStoreData;
|
||||
private persistPath: string | undefined;
|
||||
|
||||
constructor(init?: {
|
||||
data?: SimpleVectorStoreData | undefined;
|
||||
embedModel?: BaseEmbedding | undefined;
|
||||
}) {
|
||||
super(init?.embedModel);
|
||||
constructor(
|
||||
init?: {
|
||||
data?: SimpleVectorStoreData | undefined;
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
this.data = init?.data || new SimpleVectorStoreData();
|
||||
}
|
||||
|
||||
@@ -144,7 +142,7 @@ export class SimpleVectorStore
|
||||
return await SimpleVectorStore.fromPersistPath(persistPath, embedModel);
|
||||
}
|
||||
|
||||
get client(): any {
|
||||
client(): any {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -272,7 +270,7 @@ export class SimpleVectorStore
|
||||
|
||||
static async fromPersistPath(
|
||||
persistPath: string,
|
||||
embedModel?: BaseEmbedding,
|
||||
embeddingModel?: BaseEmbedding,
|
||||
): Promise<SimpleVectorStore> {
|
||||
const dirPath = path.dirname(persistPath);
|
||||
if (!(await exists(dirPath))) {
|
||||
@@ -298,20 +296,20 @@ export class SimpleVectorStore
|
||||
data.embeddingDict = dataDict.embeddingDict ?? {};
|
||||
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
|
||||
data.metadataDict = dataDict.metadataDict ?? {};
|
||||
const store = new SimpleVectorStore({ data, embedModel });
|
||||
const store = new SimpleVectorStore({ data, embeddingModel });
|
||||
store.persistPath = persistPath;
|
||||
return store;
|
||||
}
|
||||
|
||||
static fromDict(
|
||||
saveDict: SimpleVectorStoreData,
|
||||
embedModel?: BaseEmbedding,
|
||||
embeddingModel?: BaseEmbedding,
|
||||
): SimpleVectorStore {
|
||||
const data = new SimpleVectorStoreData();
|
||||
data.embeddingDict = saveDict.embeddingDict;
|
||||
data.textIdToRefDocId = saveDict.textIdToRefDocId;
|
||||
data.metadataDict = saveDict.metadataDict;
|
||||
return new SimpleVectorStore({ data, embedModel });
|
||||
return new SimpleVectorStore({ data, embeddingModel });
|
||||
}
|
||||
|
||||
toDict(): SimpleVectorStoreData {
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import {
|
||||
VectorStoreBase,
|
||||
type IEmbedModel,
|
||||
BaseVectorStore,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -18,15 +17,12 @@ type UpstashParams = {
|
||||
token?: string;
|
||||
endpoint?: string;
|
||||
maxBatchSize?: number;
|
||||
} & IEmbedModel;
|
||||
} & VectorStoreBaseParams;
|
||||
|
||||
/**
|
||||
* Provides support for writing and querying vector data in Upstash.
|
||||
*/
|
||||
export class UpstashVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class UpstashVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
private db: Index;
|
||||
@@ -45,7 +41,7 @@ export class UpstashVectorStore
|
||||
* ```
|
||||
*/
|
||||
constructor(params?: UpstashParams) {
|
||||
super(params?.embedModel);
|
||||
super(params);
|
||||
this.namespace = params?.namespace ?? "";
|
||||
this.maxBatchSize = params?.maxBatchSize ?? 1000;
|
||||
const token = params?.token ?? getEnv("UPSTASH_VECTOR_REST_TOKEN");
|
||||
|
||||
@@ -11,12 +11,11 @@ import weaviate, {
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { BaseHybridOptions } from "weaviate-client";
|
||||
import {
|
||||
VectorStoreBase,
|
||||
BaseVectorStore,
|
||||
VectorStoreQueryMode,
|
||||
type IEmbedModel,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreNoEmbedModel,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
@@ -119,10 +118,7 @@ const toWeaviateFilter = (
|
||||
return Filters[condition](...filtersList);
|
||||
};
|
||||
|
||||
export class WeaviateVectorStore
|
||||
extends VectorStoreBase
|
||||
implements VectorStoreNoEmbedModel
|
||||
{
|
||||
export class WeaviateVectorStore extends BaseVectorStore {
|
||||
public storesText: boolean = true;
|
||||
private flatMetadata: boolean = true;
|
||||
|
||||
@@ -137,7 +133,7 @@ export class WeaviateVectorStore
|
||||
private metadataKey: string;
|
||||
|
||||
constructor(
|
||||
init?: Partial<IEmbedModel> & {
|
||||
init?: VectorStoreBaseParams & {
|
||||
weaviateClient?: WeaviateClient;
|
||||
cloudOptions?: {
|
||||
clusterURL?: string;
|
||||
@@ -150,7 +146,7 @@ export class WeaviateVectorStore
|
||||
embeddingKey?: string;
|
||||
},
|
||||
) {
|
||||
super(init?.embedModel);
|
||||
super(init);
|
||||
|
||||
if (init?.weaviateClient) {
|
||||
// Use the provided client
|
||||
|
||||
@@ -76,34 +76,28 @@ export interface VectorStoreQuery {
|
||||
mmrThreshold?: number;
|
||||
}
|
||||
|
||||
export interface VectorStoreNoEmbedModel {
|
||||
storesText: boolean;
|
||||
// Supported types of vector stores (for each modality)
|
||||
export type VectorStoreByType = {
|
||||
[P in ModalityType]?: BaseVectorStore;
|
||||
};
|
||||
|
||||
export type VectorStoreBaseParams = {
|
||||
embeddingModel?: BaseEmbedding | undefined;
|
||||
};
|
||||
|
||||
export abstract class BaseVectorStore {
|
||||
embedModel: BaseEmbedding;
|
||||
abstract storesText: boolean;
|
||||
isEmbeddingQuery?: boolean;
|
||||
client(): any;
|
||||
add(embeddingResults: BaseNode[]): Promise<string[]>;
|
||||
delete(refDocId: string, deleteOptions?: any): Promise<void>;
|
||||
query(
|
||||
abstract client(): any;
|
||||
abstract add(embeddingResults: BaseNode[]): Promise<string[]>;
|
||||
abstract delete(refDocId: string, deleteOptions?: any): Promise<void>;
|
||||
abstract query(
|
||||
query: VectorStoreQuery,
|
||||
options?: any,
|
||||
): Promise<VectorStoreQueryResult>;
|
||||
}
|
||||
|
||||
export interface IEmbedModel {
|
||||
embedModel: BaseEmbedding;
|
||||
}
|
||||
|
||||
export interface VectorStore extends VectorStoreNoEmbedModel, IEmbedModel {}
|
||||
|
||||
// Supported types of vector stores (for each modality)
|
||||
|
||||
export type VectorStoreByType = {
|
||||
[P in ModalityType]?: VectorStore;
|
||||
};
|
||||
|
||||
export abstract class VectorStoreBase implements IEmbedModel {
|
||||
embedModel: BaseEmbedding;
|
||||
|
||||
protected constructor(embedModel?: BaseEmbedding) {
|
||||
this.embedModel = embedModel ?? getEmbeddedModel();
|
||||
protected constructor(params?: VectorStoreBaseParams) {
|
||||
this.embedModel = params?.embeddingModel ?? getEmbeddedModel();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,6 +102,35 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
|
||||
});
|
||||
});
|
||||
|
||||
test("[MetadataExtractor] QuestionsAnsweredExtractor uses custom prompt template", async () => {
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
|
||||
]);
|
||||
|
||||
const llmCompleteSpy = vi.spyOn(serviceContext.llm, "complete");
|
||||
|
||||
const questionsAnsweredExtractor = new QuestionsAnsweredExtractor({
|
||||
llm: serviceContext.llm,
|
||||
questions: 5,
|
||||
promptTemplate: `This is a custom prompt template for {context} with {numQuestions} questions`,
|
||||
});
|
||||
|
||||
await questionsAnsweredExtractor.processNodes(nodes);
|
||||
|
||||
expect(llmCompleteSpy).toHaveBeenCalled();
|
||||
|
||||
// Build the expected prompt
|
||||
const expectedPrompt = `This is a custom prompt template for ${DEFAULT_LLM_TEXT_OUTPUT} with 5 questions`;
|
||||
|
||||
// Get the actual prompt used in llm.complete
|
||||
const actualPrompt = llmCompleteSpy.mock?.calls?.[0]?.[0];
|
||||
|
||||
// Assert that the prompts match
|
||||
expect(actualPrompt).toEqual({ prompt: expectedPrompt });
|
||||
});
|
||||
|
||||
test("[MetadataExtractor] SumamryExtractor returns sectionSummary metadata", async () => {
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
@@ -119,4 +148,33 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
|
||||
sectionSummary: DEFAULT_LLM_TEXT_OUTPUT,
|
||||
});
|
||||
});
|
||||
|
||||
test("[KeywordExtractor] KeywordExtractor uses custom prompt template", async () => {
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
|
||||
]);
|
||||
|
||||
const llmCompleteSpy = vi.spyOn(serviceContext.llm, "complete");
|
||||
|
||||
const keywordExtractor = new KeywordExtractor({
|
||||
llm: serviceContext.llm,
|
||||
keywords: 5,
|
||||
promptTemplate: `This is a custom prompt template for {context} with {maxKeywords} keywords`,
|
||||
});
|
||||
|
||||
await keywordExtractor.processNodes(nodes);
|
||||
|
||||
expect(llmCompleteSpy).toHaveBeenCalled();
|
||||
|
||||
// Build the expected prompt
|
||||
const expectedPrompt = `This is a custom prompt template for ${DEFAULT_LLM_TEXT_OUTPUT} with 5 keywords`;
|
||||
|
||||
// Get the actual prompt used in llm.complete
|
||||
const actualPrompt = llmCompleteSpy.mock?.calls?.[0]?.[0];
|
||||
|
||||
// Assert that the prompts match
|
||||
expect(actualPrompt).toEqual({ prompt: expectedPrompt });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
import { expect, test, vi } from "vitest";
|
||||
|
||||
test("init without error", async () => {
|
||||
vi.stubEnv("OPENAI_API_KEY", undefined);
|
||||
const { Settings } = await import("llamaindex");
|
||||
expect(Settings.llm).toBeDefined();
|
||||
});
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user