mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-01 22:14:03 -04:00
Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c1578a19d9 | |||
| ae49ff4e15 | |||
| a75af835a5 | |||
| 7c7cd34908 | |||
| f651891196 | |||
| 04714c886f | |||
| cf28574f51 | |||
| 24d065f054 | |||
| b8719586e3 | |||
| 07a40aca49 | |||
| 33b562938d | |||
| 723b41c23c | |||
| 4c38c1be0b | |||
| 0dde0ca27f | |||
| f3e0d07f48 | |||
| 1364e8eeed | |||
| 96fc69cc61 | |||
| 3b7736f763 | |||
| a7a7afe66e | |||
| c646ee2eca | |||
| 5729bd92fd | |||
| e0e52cf879 | |||
| 6f75306c17 | |||
| 94cb4ad810 | |||
| 1ea4014746 | |||
| 6a9a7b1458 | |||
| 1c168cd531 | |||
| 62cba5236d | |||
| d265e96420 | |||
| d30bbf799f | |||
| 53fd00a7c3 | |||
| 83f2848d47 | |||
| 313071e9cd | |||
| 5f6782038a | |||
| fe08d0451b | |||
| 59c5e5c3d4 | |||
| ee697fb1b3 | |||
| cf3320a4ea | |||
| f2ed69f2f8 | |||
| 3489e7de84 | |||
| 468bda594e | |||
| 6f3a31caf6 | |||
| 63e9846e97 |
@@ -25,4 +25,4 @@ jobs:
|
||||
run: pnpm run build
|
||||
|
||||
- name: Pre Release
|
||||
run: pnpx pkg-pr-new publish ./packages/*
|
||||
run: pnpx pkg-pr-new publish ./packages/* ./packages/providers/*
|
||||
|
||||
+20
-21
@@ -136,27 +136,26 @@ jobs:
|
||||
run: pnpm run build
|
||||
- name: Copy examples
|
||||
run: rsync -rv --exclude=node_modules ./examples ${{ runner.temp }}
|
||||
- name: Pack @llamaindex/cloud
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/cloud
|
||||
- name: Pack @llamaindex/openai
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/llm/openai
|
||||
- name: Pack @llamaindex/groq
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/llm/groq
|
||||
- name: Pack @llamaindex/ollama
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/llm/ollama
|
||||
- name: Pack @llamaindex/core
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/core
|
||||
- name: Pack @llamaindex/env
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/env
|
||||
- name: Pack llamaindex
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/llamaindex
|
||||
- name: Pack packages
|
||||
run: |
|
||||
for dir in packages/*; do
|
||||
if [ -d "$dir" ] && [ -f "$dir/package.json" ] && [[ ! "$dir" =~ autotool ]]; then
|
||||
echo "Packing $dir"
|
||||
pnpm pack --pack-destination ${{ runner.temp }} -C $dir
|
||||
else
|
||||
echo "Skipping $dir, no package.json found"
|
||||
fi
|
||||
done
|
||||
- name: Pack provider packages
|
||||
run: |
|
||||
for dir in packages/providers/*; do
|
||||
if [ -d "$dir" ] && [ -f "$dir/package.json" ]; then
|
||||
echo "Packing $dir"
|
||||
pnpm pack --pack-destination ${{ runner.temp }} -C $dir
|
||||
else
|
||||
echo "Skipping $dir, no package.json found"
|
||||
fi
|
||||
done
|
||||
- name: Install
|
||||
run: npm add ${{ runner.temp }}/*.tgz
|
||||
working-directory: ${{ runner.temp }}/examples
|
||||
|
||||
@@ -1,5 +1,80 @@
|
||||
# docs
|
||||
|
||||
## 0.0.93
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.0.92
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/examples@0.0.9
|
||||
|
||||
## 0.0.91
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.90
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.89
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.88
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.87
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.86
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docs",
|
||||
"version": "0.0.84",
|
||||
"version": "0.0.93",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"docusaurus": "docusaurus",
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
# examples
|
||||
|
||||
## 0.0.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/core@0.3.0
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import {
|
||||
AstraDBVectorStore,
|
||||
Document,
|
||||
MetadataFilters,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
@@ -42,8 +43,10 @@ async function main() {
|
||||
const index = await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
|
||||
const queryEngine = index.asQueryEngine();
|
||||
const preFilters: MetadataFilters = {
|
||||
filters: [{ key: "id", operator: "in", value: [123, 789] }],
|
||||
}; // try changing the filters to see the different results
|
||||
const queryEngine = index.asQueryEngine({ preFilters });
|
||||
const response = await queryEngine.query({
|
||||
query: "Describe AstraDB.",
|
||||
});
|
||||
|
||||
@@ -1,57 +1,83 @@
|
||||
import {
|
||||
ChromaVectorStore,
|
||||
Document,
|
||||
MetadataFilters,
|
||||
VectorStoreIndex,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
|
||||
const collectionName = "dog_colors";
|
||||
const collectionName = "dogs_with_color";
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const docs = [
|
||||
new Document({
|
||||
text: "The dog is brown",
|
||||
metadata: {
|
||||
dogId: "1",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
text: "The dog is red",
|
||||
metadata: {
|
||||
dogId: "2",
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
console.log("Creating ChromaDB vector store");
|
||||
const chromaVS = new ChromaVectorStore({ collectionName });
|
||||
const ctx = await storageContextFromDefaults({ vectorStore: chromaVS });
|
||||
const index = await VectorStoreIndex.fromVectorStore(chromaVS);
|
||||
|
||||
console.log("Embedding documents and adding to index");
|
||||
const index = await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
const queryFn = async (filters?: MetadataFilters) => {
|
||||
console.log("\nQuerying dogs by filters: ", JSON.stringify(filters));
|
||||
const query = "List all colors of dogs";
|
||||
const queryEngine = index.asQueryEngine({
|
||||
preFilters: filters,
|
||||
similarityTopK: 3,
|
||||
});
|
||||
const response = await queryEngine.query({ query });
|
||||
console.log(response.toString());
|
||||
};
|
||||
|
||||
console.log("Querying index");
|
||||
const queryEngine = index.asQueryEngine({
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "dogId",
|
||||
value: "2",
|
||||
operator: "==",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const response = await queryEngine.query({
|
||||
query: "What is the color of the dog?",
|
||||
});
|
||||
console.log(response.toString());
|
||||
await queryFn(); // red, brown, yellow
|
||||
await queryFn({ filters: [{ key: "dogId", value: "1", operator: "==" }] }); // brown
|
||||
await queryFn({ filters: [{ key: "dogId", value: "1", operator: "!=" }] }); // red, yellow
|
||||
await queryFn({
|
||||
filters: [
|
||||
{ key: "dogId", value: "1", operator: "==" },
|
||||
{ key: "dogId", value: "3", operator: "==" },
|
||||
],
|
||||
condition: "or",
|
||||
}); // brown, yellow
|
||||
await queryFn({
|
||||
filters: [{ key: "dogId", value: ["1", "2"], operator: "in" }],
|
||||
}); // red, brown
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
void main();
|
||||
async function generate() {
|
||||
const docs = [
|
||||
new Document({
|
||||
id_: "doc1",
|
||||
text: "The dog is brown",
|
||||
metadata: {
|
||||
dogId: "1",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
id_: "doc2",
|
||||
text: "The dog is red",
|
||||
metadata: {
|
||||
dogId: "2",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
id_: "doc3",
|
||||
text: "The dog is yellow",
|
||||
metadata: {
|
||||
dogId: "3",
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
console.log("Creating ChromaDB vector store");
|
||||
const chromaVS = new ChromaVectorStore({ collectionName });
|
||||
const ctx = await storageContextFromDefaults({ vectorStore: chromaVS });
|
||||
|
||||
console.log("Embedding documents and adding to index");
|
||||
await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await generate();
|
||||
await main();
|
||||
})();
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
import {
|
||||
Document,
|
||||
MetadataFilters,
|
||||
Settings,
|
||||
SimpleDocumentStore,
|
||||
VectorStoreIndex,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
|
||||
async function getDataSource() {
|
||||
const docs = [
|
||||
new Document({ text: "The dog is brown", metadata: { dogId: "1" } }),
|
||||
new Document({ text: "The dog is yellow", metadata: { dogId: "2" } }),
|
||||
];
|
||||
const storageContext = await storageContextFromDefaults({
|
||||
persistDir: "./cache",
|
||||
});
|
||||
const numberOfDocs = Object.keys(
|
||||
(storageContext.docStore as SimpleDocumentStore).toDict(),
|
||||
).length;
|
||||
if (numberOfDocs === 0) {
|
||||
return await VectorStoreIndex.fromDocuments(docs, { storageContext });
|
||||
}
|
||||
return await VectorStoreIndex.init({
|
||||
storageContext,
|
||||
});
|
||||
}
|
||||
|
||||
Settings.callbackManager.on("retrieve-end", (event) => {
|
||||
const { nodes, query } = event.detail;
|
||||
console.log(`${query.query} - Number of retrieved nodes:`, nodes.length);
|
||||
});
|
||||
|
||||
async function main() {
|
||||
const index = await getDataSource();
|
||||
const filters: MetadataFilters = {
|
||||
filters: [{ key: "dogId", value: "2", operator: "==" }],
|
||||
};
|
||||
|
||||
const retriever = index.asRetriever({ similarityTopK: 3, filters });
|
||||
const queryEngine = index.asQueryEngine({
|
||||
similarityTopK: 3,
|
||||
preFilters: filters,
|
||||
});
|
||||
|
||||
console.log("Retriever and query engine should only retrieve 1 node:");
|
||||
await retriever.retrieve({ query: "Retriever: get dog" });
|
||||
await queryEngine.query({ query: "QueryEngine: get dog" });
|
||||
}
|
||||
|
||||
void main();
|
||||
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@llamaindex/examples",
|
||||
"private": true,
|
||||
"version": "0.0.8",
|
||||
"version": "0.0.9",
|
||||
"dependencies": {
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@llamaindex/core": "^0.2.0",
|
||||
"@llamaindex/core": "^0.3.0",
|
||||
"@notionhq/client": "^2.2.15",
|
||||
"@pinecone-database/pinecone": "^3.0.2",
|
||||
"@vercel/postgres": "^0.10.0",
|
||||
@@ -15,7 +15,7 @@
|
||||
"commander": "^12.1.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"llamaindex": "^0.6.0",
|
||||
"llamaindex": "^0.7.0",
|
||||
"mongodb": "^6.7.0",
|
||||
"pathe": "^1.1.2",
|
||||
"postgres": "^3.4.4"
|
||||
|
||||
@@ -39,6 +39,12 @@ async function main() {
|
||||
dogId: "2",
|
||||
},
|
||||
}),
|
||||
new Document({
|
||||
text: "The dog is black",
|
||||
metadata: {
|
||||
dogId: "3",
|
||||
},
|
||||
}),
|
||||
];
|
||||
console.log("Creating QdrantDB vector store");
|
||||
const qdrantVs = new QdrantVectorStore({ url: qdrantUrl, collectionName });
|
||||
@@ -73,6 +79,42 @@ async function main() {
|
||||
query: "What is the color of the dog?",
|
||||
});
|
||||
console.log("Filter with dogId 2 response:", response.toString());
|
||||
|
||||
console.log("Querying index with dogId !=2: Expected output: Not red");
|
||||
const queryEngineNotDogId2 = index.asQueryEngine({
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "dogId",
|
||||
value: "2",
|
||||
operator: "!=",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const responseNotDogId2 = await queryEngineNotDogId2.query({
|
||||
query: "What is the color of the dog?",
|
||||
});
|
||||
console.log(responseNotDogId2.toString());
|
||||
|
||||
console.log(
|
||||
"Querying index with dogId 2 or 3: Expected output: Red, Black",
|
||||
);
|
||||
const queryEngineIn = index.asQueryEngine({
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "dogId",
|
||||
value: ["2", "3"],
|
||||
operator: "in",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const responseIn = await queryEngineIn.query({
|
||||
query: "List all dogs",
|
||||
});
|
||||
console.log(responseIn.toString());
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
|
||||
@@ -25,12 +25,9 @@ async function main() {
|
||||
similarityCutoff: 0.7,
|
||||
});
|
||||
// TODO: cannot pass responseSynthesizer into retriever query engine
|
||||
const queryEngine = new RetrieverQueryEngine(
|
||||
retriever,
|
||||
undefined,
|
||||
undefined,
|
||||
[nodePostprocessor],
|
||||
);
|
||||
const queryEngine = new RetrieverQueryEngine(retriever, undefined, [
|
||||
nodePostprocessor,
|
||||
]);
|
||||
|
||||
const response = await queryEngine.query({
|
||||
query: "What did the author do growing up?",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
BaseVectorStore,
|
||||
getResponseSynthesizer,
|
||||
OpenAI,
|
||||
OpenAIEmbedding,
|
||||
@@ -6,7 +7,6 @@ import {
|
||||
Settings,
|
||||
TextNode,
|
||||
VectorIndexRetriever,
|
||||
VectorStore,
|
||||
VectorStoreIndex,
|
||||
VectorStoreQuery,
|
||||
VectorStoreQueryResult,
|
||||
@@ -24,7 +24,7 @@ Settings.llm = new OpenAI({
|
||||
* Please do not use this class in production; it's only for demonstration purposes.
|
||||
*/
|
||||
class PineconeVectorStore<T extends RecordMetadata = RecordMetadata>
|
||||
implements VectorStore
|
||||
implements BaseVectorStore
|
||||
{
|
||||
storesText = true;
|
||||
isEmbeddingQuery = false;
|
||||
@@ -165,9 +165,7 @@ async function main() {
|
||||
});
|
||||
|
||||
const responseSynthesizer = getResponseSynthesizer("tree_summarize");
|
||||
return new RetrieverQueryEngine(retriever, responseSynthesizer, {
|
||||
filter,
|
||||
});
|
||||
return new RetrieverQueryEngine(retriever, responseSynthesizer);
|
||||
};
|
||||
|
||||
// whatever is a key from your metadata
|
||||
|
||||
@@ -1,5 +1,80 @@
|
||||
# @llamaindex/autotool
|
||||
|
||||
## 4.0.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 4.0.0
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 3.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 3.0.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 3.0.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 3.0.19
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 3.0.18
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 3.0.17
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 3.0.16
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 3.0.15
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,88 @@
|
||||
# @llamaindex/autotool-01-node-example
|
||||
|
||||
## 0.0.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
- @llamaindex/autotool@4.0.1
|
||||
|
||||
## 0.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/autotool@4.0.0
|
||||
|
||||
## 0.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
- @llamaindex/autotool@3.0.22
|
||||
|
||||
## 0.0.30
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
- @llamaindex/autotool@3.0.21
|
||||
|
||||
## 0.0.29
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
- @llamaindex/autotool@3.0.20
|
||||
|
||||
## 0.0.28
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
- @llamaindex/autotool@3.0.19
|
||||
|
||||
## 0.0.27
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
- @llamaindex/autotool@3.0.18
|
||||
|
||||
## 0.0.26
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
- @llamaindex/autotool@3.0.17
|
||||
|
||||
## 0.0.25
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
- @llamaindex/autotool@3.0.16
|
||||
|
||||
## 0.0.24
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -13,5 +13,5 @@
|
||||
"scripts": {
|
||||
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
|
||||
},
|
||||
"version": "0.0.24"
|
||||
"version": "0.0.33"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,88 @@
|
||||
# @llamaindex/autotool-02-next-example
|
||||
|
||||
## 0.1.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
- @llamaindex/autotool@4.0.1
|
||||
|
||||
## 0.1.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
- @llamaindex/autotool@4.0.0
|
||||
|
||||
## 0.1.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
- @llamaindex/autotool@3.0.22
|
||||
|
||||
## 0.1.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
- @llamaindex/autotool@3.0.21
|
||||
|
||||
## 0.1.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
- @llamaindex/autotool@3.0.20
|
||||
|
||||
## 0.1.72
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
- @llamaindex/autotool@3.0.19
|
||||
|
||||
## 0.1.71
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
- @llamaindex/autotool@3.0.18
|
||||
|
||||
## 0.1.70
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
- @llamaindex/autotool@3.0.17
|
||||
|
||||
## 0.1.69
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
- @llamaindex/autotool@3.0.16
|
||||
|
||||
## 0.1.68
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool-02-next-example",
|
||||
"private": true,
|
||||
"version": "0.1.68",
|
||||
"version": "0.1.77",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool",
|
||||
"type": "module",
|
||||
"version": "3.0.15",
|
||||
"version": "4.0.1",
|
||||
"description": "auto transpile your JS function to LLM Agent compatible",
|
||||
"files": [
|
||||
"dist",
|
||||
@@ -70,7 +70,7 @@
|
||||
"@swc/types": "^0.1.12",
|
||||
"@types/json-schema": "^7.0.15",
|
||||
"@types/node": "^22.5.1",
|
||||
"bunchee": "5.3.2",
|
||||
"bunchee": "5.5.1",
|
||||
"llamaindex": "workspace:*",
|
||||
"next": "14.2.11",
|
||||
"rollup": "^4.21.2",
|
||||
|
||||
@@ -1,5 +1,47 @@
|
||||
# @llamaindex/cloud
|
||||
|
||||
## 1.0.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 4c38c1b: fix(cloud): do not detect file type in llama parse
|
||||
- 24d065f: Log Parse Job Errors when verbose is enabled
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [a75af83]
|
||||
- @llamaindex/env@0.1.14
|
||||
- @llamaindex/core@0.3.1
|
||||
|
||||
## 1.0.0
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- @llamaindex/core@0.3.0
|
||||
|
||||
## 0.2.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- @llamaindex/core@0.2.12
|
||||
|
||||
## 0.2.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- @llamaindex/core@0.2.11
|
||||
|
||||
## 0.2.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [3489e7d]
|
||||
- Updated dependencies [468bda5]
|
||||
- @llamaindex/core@0.2.10
|
||||
|
||||
## 0.2.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloud",
|
||||
"version": "0.2.11",
|
||||
"version": "1.0.1",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
@@ -53,13 +53,10 @@
|
||||
"@hey-api/openapi-ts": "^0.53.0",
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*",
|
||||
"bunchee": "5.3.2"
|
||||
"bunchee": "5.5.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*"
|
||||
},
|
||||
"dependencies": {
|
||||
"magic-bytes.js": "^1.10.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { type Client, createClient, createConfig } from "@hey-api/client-fetch";
|
||||
import { Document, FileReader } from "@llamaindex/core/schema";
|
||||
import { fs, getEnv, path } from "@llamaindex/env";
|
||||
import { filetypeinfo } from "magic-bytes.js";
|
||||
import {
|
||||
type Body_upload_file_api_v1_parsing_upload_post,
|
||||
type ParserLanguages,
|
||||
@@ -13,99 +12,6 @@ export type Language = ParserLanguages;
|
||||
|
||||
export type ResultType = "text" | "markdown" | "json";
|
||||
|
||||
const SUPPORT_FILE_EXT: string[] = [
|
||||
".pdf",
|
||||
// document and presentations
|
||||
".602",
|
||||
".abw",
|
||||
".cgm",
|
||||
".cwk",
|
||||
".doc",
|
||||
".docx",
|
||||
".docm",
|
||||
".dot",
|
||||
".dotm",
|
||||
".hwp",
|
||||
".key",
|
||||
".lwp",
|
||||
".mw",
|
||||
".mcw",
|
||||
".pages",
|
||||
".pbd",
|
||||
".ppt",
|
||||
".pptm",
|
||||
".pptx",
|
||||
".pot",
|
||||
".potm",
|
||||
".potx",
|
||||
".rtf",
|
||||
".sda",
|
||||
".sdd",
|
||||
".sdp",
|
||||
".sdw",
|
||||
".sgl",
|
||||
".sti",
|
||||
".sxi",
|
||||
".sxw",
|
||||
".stw",
|
||||
".sxg",
|
||||
".txt",
|
||||
".uof",
|
||||
".uop",
|
||||
".uot",
|
||||
".vor",
|
||||
".wpd",
|
||||
".wps",
|
||||
".xml",
|
||||
".zabw",
|
||||
".epub",
|
||||
// images
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".bmp",
|
||||
".svg",
|
||||
".tiff",
|
||||
".webp",
|
||||
// web
|
||||
".htm",
|
||||
".html",
|
||||
// spreadsheets
|
||||
".xlsx",
|
||||
".xls",
|
||||
".xlsm",
|
||||
".xlsb",
|
||||
".xlw",
|
||||
".csv",
|
||||
".dif",
|
||||
".sylk",
|
||||
".slk",
|
||||
".prn",
|
||||
".numbers",
|
||||
".et",
|
||||
".ods",
|
||||
".fods",
|
||||
".uos1",
|
||||
".uos2",
|
||||
".dbf",
|
||||
".wk1",
|
||||
".wk2",
|
||||
".wk3",
|
||||
".wk4",
|
||||
".wks",
|
||||
".123",
|
||||
".wq1",
|
||||
".wq2",
|
||||
".wb1",
|
||||
".wb2",
|
||||
".wb3",
|
||||
".qpw",
|
||||
".xlr",
|
||||
".eth",
|
||||
".tsv",
|
||||
];
|
||||
|
||||
//todo: should move into @llamaindex/env
|
||||
type WriteStream = {
|
||||
write: (text: string) => void;
|
||||
@@ -239,17 +145,12 @@ export class LlamaParseReader extends FileReader {
|
||||
|
||||
// Create a job for the LlamaParse API
|
||||
private async createJob(data: Uint8Array): Promise<string> {
|
||||
// Load data, set the mime type
|
||||
const { mime } = await LlamaParseReader.getMimeType(data);
|
||||
|
||||
if (this.verbose) {
|
||||
console.log("Started uploading the file");
|
||||
}
|
||||
|
||||
const body = {
|
||||
file: new Blob([data], {
|
||||
type: mime,
|
||||
}),
|
||||
file: new Blob([data]),
|
||||
language: this.language,
|
||||
parsing_instruction: this.parsingInstruction,
|
||||
skip_diagonal_text: this.skipDiagonalText,
|
||||
@@ -368,6 +269,11 @@ export class LlamaParseReader extends FileReader {
|
||||
}
|
||||
tries++;
|
||||
} else {
|
||||
if (this.verbose) {
|
||||
console.error(
|
||||
`Recieved Error response ${status} for job ${jobId}. Got Error Code: ${data.error_code} and Error Message: ${data.error_message}`,
|
||||
);
|
||||
}
|
||||
throw new Error(
|
||||
`Failed to parse the file: ${jobId}, status: ${status}`,
|
||||
);
|
||||
@@ -564,24 +470,4 @@ export class LlamaParseReader extends FileReader {
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
static async getMimeType(
|
||||
data: Uint8Array,
|
||||
): Promise<{ mime: string; extension: string }> {
|
||||
const typeinfos = filetypeinfo(data);
|
||||
// find the first type info that matches the supported MIME types
|
||||
// It could be happened that docx file is recognized as zip file, so we need to check the mime type
|
||||
const info = typeinfos.find((info) => {
|
||||
if (info.extension && SUPPORT_FILE_EXT.includes(`.${info.extension}`)) {
|
||||
return info;
|
||||
}
|
||||
});
|
||||
if (!info || !info.mime || !info.extension) {
|
||||
const ext = SUPPORT_FILE_EXT.join(", ");
|
||||
throw new Error(
|
||||
`File has type which does not match supported MIME Types. Supported formats include: ${ext}`,
|
||||
);
|
||||
}
|
||||
return { mime: info.mime, extension: info.extension };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,45 @@
|
||||
# @llamaindex/community
|
||||
|
||||
## 0.0.49
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [a75af83]
|
||||
- @llamaindex/env@0.1.14
|
||||
- @llamaindex/core@0.3.1
|
||||
|
||||
## 0.0.48
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- @llamaindex/core@0.3.0
|
||||
|
||||
## 0.0.47
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- @llamaindex/core@0.2.12
|
||||
|
||||
## 0.0.46
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- @llamaindex/core@0.2.11
|
||||
|
||||
## 0.0.45
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [3489e7d]
|
||||
- Updated dependencies [468bda5]
|
||||
- @llamaindex/core@0.2.10
|
||||
|
||||
## 0.0.44
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/community",
|
||||
"description": "Community package for LlamaIndexTS",
|
||||
"version": "0.0.44",
|
||||
"version": "0.0.49",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
@@ -43,11 +43,11 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.5.1",
|
||||
"bunchee": "5.3.2"
|
||||
"bunchee": "5.5.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.642.0",
|
||||
"@aws-sdk/client-bedrock-agent-runtime": "^3.642.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.642.0",
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,40 @@
|
||||
# @llamaindex/core
|
||||
|
||||
## 0.3.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [a75af83]
|
||||
- @llamaindex/env@0.1.14
|
||||
|
||||
## 0.3.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- 1364e8e: update metadata extractors to use PromptTemplate
|
||||
- 96fc69c: add defaultQuestionExtractPrompt
|
||||
|
||||
## 0.2.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5f67820: Fix that node parsers generate nodes with UUIDs
|
||||
|
||||
## 0.2.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- ee697fb: fix: generate uuid when inserting to Qdrant
|
||||
|
||||
## 0.2.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 3489e7d: fix: num output incorrect in prompt helper
|
||||
- 468bda5: fix: correct warning when chunk size smaller than 0
|
||||
|
||||
## 0.2.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"type": "module",
|
||||
"main": "./dist/index.cjs",
|
||||
"module": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"exports": "./dist/index.js",
|
||||
"private": true
|
||||
}
|
||||
+42
-10
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/core",
|
||||
"type": "module",
|
||||
"version": "0.2.9",
|
||||
"version": "0.3.1",
|
||||
"description": "LlamaIndex Core Module",
|
||||
"exports": {
|
||||
"./agent": {
|
||||
@@ -258,16 +258,44 @@
|
||||
},
|
||||
"./vector-store": {
|
||||
"require": {
|
||||
"types": "./dist/vector-store/index.d.cts",
|
||||
"default": "./dist/vector-store/index.cjs"
|
||||
"types": "./vector-store/dist/index.d.cts",
|
||||
"default": "./vector-store/dist/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./dist/vector-store/index.d.ts",
|
||||
"default": "./dist/vector-store/index.js"
|
||||
"types": "./vector-store/dist/index.d.ts",
|
||||
"default": "./vector-store/dist/index.js"
|
||||
},
|
||||
"default": {
|
||||
"types": "./dist/vector-store/index.d.ts",
|
||||
"default": "./dist/vector-store/index.js"
|
||||
"types": "./vector-store/dist/index.d.ts",
|
||||
"default": "./vector-store/dist/index.js"
|
||||
}
|
||||
},
|
||||
"./tools": {
|
||||
"require": {
|
||||
"types": "./tools/dist/index.d.cts",
|
||||
"default": "./tools/dist/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./tools/dist/index.d.ts",
|
||||
"default": "./tools/dist/index.js"
|
||||
},
|
||||
"default": {
|
||||
"types": "./tools/dist/index.d.ts",
|
||||
"default": "./tools/dist/index.js"
|
||||
}
|
||||
},
|
||||
"./data-structs": {
|
||||
"require": {
|
||||
"types": "./data-structs/dist/index.d.cts",
|
||||
"default": "./data-structs/dist/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./data-structs/dist/index.d.ts",
|
||||
"default": "./data-structs/dist/index.js"
|
||||
},
|
||||
"default": {
|
||||
"types": "./data-structs/dist/index.d.ts",
|
||||
"default": "./data-structs/dist/index.js"
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -289,7 +317,10 @@
|
||||
"./storage",
|
||||
"./response-synthesizers",
|
||||
"./chat-engine",
|
||||
"./retriever"
|
||||
"./retriever",
|
||||
"./vector-store",
|
||||
"./tools",
|
||||
"./data-structs"
|
||||
],
|
||||
"scripts": {
|
||||
"dev": "bunchee --watch",
|
||||
@@ -303,7 +334,7 @@
|
||||
"devDependencies": {
|
||||
"@edge-runtime/vm": "^4.0.3",
|
||||
"ajv": "^8.17.1",
|
||||
"bunchee": "5.3.2",
|
||||
"bunchee": "5.5.1",
|
||||
"happy-dom": "^15.7.4",
|
||||
"natural": "^8.0.1",
|
||||
"python-format-js": "^1.4.3"
|
||||
@@ -312,6 +343,7 @@
|
||||
"@llamaindex/env": "workspace:*",
|
||||
"@types/node": "^22.5.1",
|
||||
"magic-bytes.js": "^1.10.0",
|
||||
"zod": "^3.23.8"
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.3"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
import { randomUUID } from "@llamaindex/env";
|
||||
import type { UUID } from "../global";
|
||||
import { IndexStructType } from "./struct-type";
|
||||
|
||||
export abstract class IndexStruct {
|
||||
indexId: string;
|
||||
summary: string | undefined;
|
||||
|
||||
constructor(
|
||||
indexId: UUID = randomUUID(),
|
||||
summary: string | undefined = undefined,
|
||||
) {
|
||||
this.indexId = indexId;
|
||||
this.summary = summary;
|
||||
}
|
||||
|
||||
toJson(): Record<string, unknown> {
|
||||
return {
|
||||
indexId: this.indexId,
|
||||
summary: this.summary,
|
||||
};
|
||||
}
|
||||
|
||||
getSummary(): string {
|
||||
if (this.summary === undefined) {
|
||||
throw new Error("summary field of the index struct is not set");
|
||||
}
|
||||
return this.summary;
|
||||
}
|
||||
}
|
||||
|
||||
// A table of keywords mapping keywords to text chunks.
|
||||
export class KeywordTable extends IndexStruct {
|
||||
table: Map<string, Set<string>> = new Map();
|
||||
type: IndexStructType = IndexStructType.KEYWORD_TABLE;
|
||||
|
||||
addNode(keywords: string[], nodeId: string): void {
|
||||
keywords.forEach((keyword) => {
|
||||
if (!this.table.has(keyword)) {
|
||||
this.table.set(keyword, new Set());
|
||||
}
|
||||
this.table.get(keyword)!.add(nodeId);
|
||||
});
|
||||
}
|
||||
|
||||
deleteNode(keywords: string[], nodeId: string) {
|
||||
keywords.forEach((keyword) => {
|
||||
if (this.table.has(keyword)) {
|
||||
this.table.get(keyword)!.delete(nodeId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
toJson(): Record<string, unknown> {
|
||||
return {
|
||||
...super.toJson(),
|
||||
table: Array.from(this.table.entries()).reduce(
|
||||
(acc, [keyword, nodeIds]) => {
|
||||
acc[keyword] = Array.from(nodeIds);
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, string[]>,
|
||||
),
|
||||
type: this.type,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
export { IndexStruct, KeywordTable } from "./data-structs";
|
||||
export { IndexStructType } from "./struct-type";
|
||||
@@ -0,0 +1,39 @@
|
||||
export const IndexStructType = {
|
||||
NODE: "node",
|
||||
TREE: "tree",
|
||||
LIST: "list",
|
||||
KEYWORD_TABLE: "keyword_table",
|
||||
DICT: "dict",
|
||||
SIMPLE_DICT: "simple_dict",
|
||||
WEAVIATE: "weaviate",
|
||||
PINECONE: "pinecone",
|
||||
QDRANT: "qdrant",
|
||||
LANCEDB: "lancedb",
|
||||
MILVUS: "milvus",
|
||||
CHROMA: "chroma",
|
||||
MYSCALE: "myscale",
|
||||
CLICKHOUSE: "clickhouse",
|
||||
VECTOR_STORE: "vector_store",
|
||||
OPENSEARCH: "opensearch",
|
||||
DASHVECTOR: "dashvector",
|
||||
CHATGPT_RETRIEVAL_PLUGIN: "chatgpt_retrieval_plugin",
|
||||
DEEPLAKE: "deeplake",
|
||||
EPSILLA: "epsilla",
|
||||
MULTIMODAL_VECTOR_STORE: "multimodal",
|
||||
SQL: "sql",
|
||||
KG: "kg",
|
||||
SIMPLE_KG: "simple_kg",
|
||||
SIMPLE_LPG: "simple_lpg",
|
||||
NEBULAGRAPH: "nebulagraph",
|
||||
FALKORDB: "falkordb",
|
||||
EMPTY: "empty",
|
||||
COMPOSITE: "composite",
|
||||
PANDAS: "pandas",
|
||||
DOCUMENT_SUMMARY: "document_summary",
|
||||
VECTARA: "vectara",
|
||||
ZILLIZ_CLOUD_PIPELINE: "zilliz_cloud_pipeline",
|
||||
POSTGRESML: "postgresml",
|
||||
} as const;
|
||||
|
||||
export type IndexStructType =
|
||||
(typeof IndexStructType)[keyof typeof IndexStructType];
|
||||
@@ -8,18 +8,16 @@ import {
|
||||
Settings,
|
||||
} from "../global";
|
||||
import type { LLMMetadata } from "../llms";
|
||||
import { SentenceSplitter } from "../node-parser";
|
||||
import type { PromptTemplate } from "../prompts";
|
||||
import { TextSplitter, TokenTextSplitter, truncateText } from "../node-parser";
|
||||
import { BasePromptTemplate, PromptTemplate } from "../prompts";
|
||||
|
||||
/**
|
||||
* Get the empty prompt text given a prompt.
|
||||
*/
|
||||
function getEmptyPromptTxt(prompt: PromptTemplate) {
|
||||
return prompt.format({
|
||||
...Object.fromEntries(
|
||||
[...prompt.templateVars.keys()].map((key) => [key, ""]),
|
||||
),
|
||||
});
|
||||
function getEmptyPromptTxt(prompt: PromptTemplate): string {
|
||||
return prompt.format(
|
||||
Object.fromEntries([...prompt.templateVars.keys()].map((key) => [key, ""])),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -35,24 +33,24 @@ export function getBiggestPrompt(prompts: PromptTemplate[]): PromptTemplate {
|
||||
}
|
||||
|
||||
export type PromptHelperOptions = {
|
||||
contextWindow?: number;
|
||||
numOutput?: number;
|
||||
chunkOverlapRatio?: number;
|
||||
chunkSizeLimit?: number;
|
||||
tokenizer?: Tokenizer;
|
||||
separator?: string;
|
||||
contextWindow?: number | undefined;
|
||||
numOutput?: number | undefined;
|
||||
chunkOverlapRatio?: number | undefined;
|
||||
chunkSizeLimit?: number | undefined;
|
||||
tokenizer?: Tokenizer | undefined;
|
||||
separator?: string | undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* A collection of helper functions for working with prompts.
|
||||
*/
|
||||
export class PromptHelper {
|
||||
contextWindow = DEFAULT_CONTEXT_WINDOW;
|
||||
numOutput = DEFAULT_NUM_OUTPUTS;
|
||||
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
|
||||
contextWindow: number;
|
||||
numOutput: number;
|
||||
chunkOverlapRatio: number;
|
||||
chunkSizeLimit: number | undefined;
|
||||
tokenizer: Tokenizer;
|
||||
separator = " ";
|
||||
separator: string;
|
||||
|
||||
constructor(options: PromptHelperOptions = {}) {
|
||||
const {
|
||||
@@ -72,68 +70,93 @@ export class PromptHelper {
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a prompt, return the maximum size of the inputs to the prompt.
|
||||
* @param prompt
|
||||
* @returns
|
||||
* Calculate the available context size based on the number of prompt tokens.
|
||||
*/
|
||||
private getAvailableContextSize(prompt: PromptTemplate) {
|
||||
const emptyPromptText = getEmptyPromptTxt(prompt);
|
||||
const promptTokens = this.tokenizer.encode(emptyPromptText);
|
||||
const numPromptTokens = promptTokens.length;
|
||||
|
||||
return this.contextWindow - numPromptTokens - this.numOutput;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the maximum size of each chunk given a prompt.
|
||||
*/
|
||||
private getAvailableChunkSize(
|
||||
prompt: PromptTemplate,
|
||||
numChunks = 1,
|
||||
padding = 5,
|
||||
): number {
|
||||
const availableContextSize = this.getAvailableContextSize(prompt);
|
||||
|
||||
const result = Math.floor(availableContextSize / numChunks) - padding;
|
||||
|
||||
if (this.chunkSizeLimit) {
|
||||
return Math.min(this.chunkSizeLimit, result);
|
||||
} else {
|
||||
return result;
|
||||
#getAvailableContextSize(numPromptTokens: number): number {
|
||||
const contextSizeTokens =
|
||||
this.contextWindow - numPromptTokens - this.numOutput;
|
||||
if (contextSizeTokens < 0) {
|
||||
throw new Error(
|
||||
`Calculated available context size ${contextSizeTokens} is not non-negative.`,
|
||||
);
|
||||
}
|
||||
return contextSizeTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a text splitter with the correct chunk sizes and overlaps given a prompt.
|
||||
* Calculate the available chunk size based on the prompt and other parameters.
|
||||
*/
|
||||
#getAvailableChunkSize<Template extends BasePromptTemplate>(
|
||||
prompt: Template,
|
||||
numChunks: number = 1,
|
||||
padding: number = 5,
|
||||
): number {
|
||||
let numPromptTokens = 0;
|
||||
|
||||
if (prompt instanceof PromptTemplate) {
|
||||
numPromptTokens = this.tokenizer.encode(getEmptyPromptTxt(prompt)).length;
|
||||
}
|
||||
|
||||
const availableContextSize = this.#getAvailableContextSize(numPromptTokens);
|
||||
let result = Math.floor(availableContextSize / numChunks) - padding;
|
||||
|
||||
if (this.chunkSizeLimit !== undefined) {
|
||||
result = Math.min(this.chunkSizeLimit, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a text splitter configured to maximally pack the available context window.
|
||||
*/
|
||||
getTextSplitterGivenPrompt(
|
||||
prompt: PromptTemplate,
|
||||
numChunks = 1,
|
||||
padding = DEFAULT_PADDING,
|
||||
) {
|
||||
const chunkSize = this.getAvailableChunkSize(prompt, numChunks, padding);
|
||||
if (chunkSize === 0) {
|
||||
throw new Error("Got 0 as available chunk size");
|
||||
prompt: BasePromptTemplate,
|
||||
numChunks: number = 1,
|
||||
padding: number = DEFAULT_PADDING,
|
||||
): TextSplitter {
|
||||
const chunkSize = this.#getAvailableChunkSize(prompt, numChunks, padding);
|
||||
if (chunkSize <= 0) {
|
||||
throw new TypeError(`Chunk size ${chunkSize} is not positive.`);
|
||||
}
|
||||
const chunkOverlap = this.chunkOverlapRatio * chunkSize;
|
||||
return new SentenceSplitter({
|
||||
const chunkOverlap = Math.floor(this.chunkOverlapRatio * chunkSize);
|
||||
return new TokenTextSplitter({
|
||||
separator: this.separator,
|
||||
chunkSize,
|
||||
chunkOverlap,
|
||||
separator: this.separator,
|
||||
tokenizer: this.tokenizer,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Repack resplits the strings based on the optimal text splitter.
|
||||
* Truncate text chunks to fit within the available context window.
|
||||
*/
|
||||
truncate(
|
||||
prompt: BasePromptTemplate,
|
||||
textChunks: string[],
|
||||
padding: number = DEFAULT_PADDING,
|
||||
): string[] {
|
||||
const textSplitter = this.getTextSplitterGivenPrompt(
|
||||
prompt,
|
||||
textChunks.length,
|
||||
padding,
|
||||
);
|
||||
return textChunks.map((chunk) => truncateText(chunk, textSplitter));
|
||||
}
|
||||
|
||||
/**
|
||||
* Repack text chunks to better utilize the available context window.
|
||||
*/
|
||||
repack(
|
||||
prompt: PromptTemplate,
|
||||
prompt: BasePromptTemplate,
|
||||
textChunks: string[],
|
||||
padding = DEFAULT_PADDING,
|
||||
) {
|
||||
padding: number = DEFAULT_PADDING,
|
||||
): string[] {
|
||||
const textSplitter = this.getTextSplitterGivenPrompt(prompt, 1, padding);
|
||||
const combinedStr = textChunks.join("\n\n");
|
||||
const combinedStr = textChunks
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 0)
|
||||
.join("\n\n");
|
||||
return textSplitter.splitText(combinedStr);
|
||||
}
|
||||
|
||||
@@ -154,7 +177,8 @@ export class PromptHelper {
|
||||
} = options ?? {};
|
||||
return new PromptHelper({
|
||||
contextWindow: metadata.contextWindow,
|
||||
numOutput: metadata.maxTokens ?? DEFAULT_NUM_OUTPUTS,
|
||||
// fixme: numOutput is not in LLMMetadata
|
||||
numOutput: DEFAULT_NUM_OUTPUTS,
|
||||
chunkOverlapRatio,
|
||||
chunkSizeLimit,
|
||||
tokenizer,
|
||||
|
||||
@@ -13,6 +13,7 @@ export { MetadataAwareTextSplitter, NodeParser, TextSplitter } from "./base";
|
||||
export { MarkdownNodeParser } from "./markdown";
|
||||
export { SentenceSplitter } from "./sentence-splitter";
|
||||
export { SentenceWindowNodeParser } from "./sentence-window";
|
||||
export { TokenTextSplitter } from "./token-text-splitter";
|
||||
export type { SplitterParams } from "./type";
|
||||
export {
|
||||
splitByChar,
|
||||
@@ -20,5 +21,6 @@ export {
|
||||
splitByRegex,
|
||||
splitBySentenceTokenizer,
|
||||
splitBySep,
|
||||
truncateText,
|
||||
} from "./utils";
|
||||
export type { TextSplitterFn } from "./utils";
|
||||
|
||||
@@ -0,0 +1,206 @@
|
||||
import type { Tokenizer } from "@llamaindex/env";
|
||||
import { z } from "zod";
|
||||
import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, Settings } from "../global";
|
||||
import { MetadataAwareTextSplitter } from "./base";
|
||||
import type { SplitterParams } from "./type";
|
||||
import { splitByChar, splitBySep } from "./utils";
|
||||
|
||||
const DEFAULT_METADATA_FORMAT_LEN = 2;
|
||||
|
||||
const tokenTextSplitterSchema = z.object({
|
||||
chunkSize: z.number().positive().default(DEFAULT_CHUNK_SIZE),
|
||||
chunkOverlap: z.number().nonnegative().default(DEFAULT_CHUNK_OVERLAP),
|
||||
separator: z.string().default(" "),
|
||||
backupSeparators: z.array(z.string()).default(["\n"]),
|
||||
});
|
||||
|
||||
export class TokenTextSplitter extends MetadataAwareTextSplitter {
|
||||
chunkSize: number = DEFAULT_CHUNK_SIZE;
|
||||
chunkOverlap: number = DEFAULT_CHUNK_OVERLAP;
|
||||
separator: string = " ";
|
||||
backupSeparators: string[] = ["\n"];
|
||||
#tokenizer: Tokenizer;
|
||||
#splitFns: Array<(text: string) => string[]> = [];
|
||||
|
||||
constructor(
|
||||
params?: SplitterParams & Partial<z.infer<typeof tokenTextSplitterSchema>>,
|
||||
) {
|
||||
super();
|
||||
|
||||
if (params) {
|
||||
const parsedParams = tokenTextSplitterSchema.parse(params);
|
||||
this.chunkSize = parsedParams.chunkSize;
|
||||
this.chunkOverlap = parsedParams.chunkOverlap;
|
||||
this.separator = parsedParams.separator;
|
||||
this.backupSeparators = parsedParams.backupSeparators;
|
||||
}
|
||||
|
||||
if (this.chunkOverlap > this.chunkSize) {
|
||||
throw new Error(
|
||||
`Got a larger chunk overlap (${this.chunkOverlap}) than chunk size (${this.chunkSize}), should be smaller.`,
|
||||
);
|
||||
}
|
||||
|
||||
this.#tokenizer = params?.tokenizer ?? Settings.tokenizer;
|
||||
|
||||
const allSeparators = [this.separator, ...this.backupSeparators];
|
||||
this.#splitFns = allSeparators.map((sep) => splitBySep(sep));
|
||||
this.#splitFns.push(splitByChar());
|
||||
}
|
||||
|
||||
/**
|
||||
* Split text into chunks, reserving space required for metadata string.
|
||||
* @param text The text to split.
|
||||
* @param metadata The metadata string.
|
||||
* @returns An array of text chunks.
|
||||
*/
|
||||
splitTextMetadataAware(text: string, metadata: string): string[] {
|
||||
const metadataLength =
|
||||
this.tokenSize(metadata) + DEFAULT_METADATA_FORMAT_LEN;
|
||||
const effectiveChunkSize = this.chunkSize - metadataLength;
|
||||
|
||||
if (effectiveChunkSize <= 0) {
|
||||
throw new Error(
|
||||
`Metadata length (${metadataLength}) is longer than chunk size (${this.chunkSize}). ` +
|
||||
`Consider increasing the chunk size or decreasing the size of your metadata to avoid this.`,
|
||||
);
|
||||
} else if (effectiveChunkSize < 50) {
|
||||
console.warn(
|
||||
`Metadata length (${metadataLength}) is close to chunk size (${this.chunkSize}). ` +
|
||||
`Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.`,
|
||||
);
|
||||
}
|
||||
|
||||
return this._splitText(text, effectiveChunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split text into chunks.
|
||||
* @param text The text to split.
|
||||
* @returns An array of text chunks.
|
||||
*/
|
||||
splitText(text: string): string[] {
|
||||
return this._splitText(text, this.chunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method to split text into chunks up to a specified size.
|
||||
* @param text The text to split.
|
||||
* @param chunkSize The maximum size of each chunk.
|
||||
* @returns An array of text chunks.
|
||||
*/
|
||||
private _splitText(text: string, chunkSize: number): string[] {
|
||||
if (text === "") return [text];
|
||||
|
||||
// Dispatch chunking start event
|
||||
Settings.callbackManager.dispatchEvent("chunking-start", { text: [text] });
|
||||
|
||||
const splits = this._split(text, chunkSize);
|
||||
const chunks = this._merge(splits, chunkSize);
|
||||
|
||||
Settings.callbackManager.dispatchEvent("chunking-end", { chunks });
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Break text into splits that are smaller than the chunk size.
|
||||
* @param text The text to split.
|
||||
* @param chunkSize The maximum size of each split.
|
||||
* @returns An array of text splits.
|
||||
*/
|
||||
private _split(text: string, chunkSize: number): string[] {
|
||||
if (this.tokenSize(text) <= chunkSize) {
|
||||
return [text];
|
||||
}
|
||||
|
||||
for (const splitFn of this.#splitFns) {
|
||||
const splits = splitFn(text);
|
||||
if (splits.length > 1) {
|
||||
const newSplits: string[] = [];
|
||||
for (const split of splits) {
|
||||
const splitLen = this.tokenSize(split);
|
||||
if (splitLen <= chunkSize) {
|
||||
newSplits.push(split);
|
||||
} else {
|
||||
newSplits.push(...this._split(split, chunkSize));
|
||||
}
|
||||
}
|
||||
return newSplits;
|
||||
}
|
||||
}
|
||||
|
||||
return [text];
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge splits into chunks with overlap.
|
||||
* @param splits The array of text splits.
|
||||
* @param chunkSize The maximum size of each chunk.
|
||||
* @returns An array of merged text chunks.
|
||||
*/
|
||||
private _merge(splits: string[], chunkSize: number): string[] {
|
||||
const chunks: string[] = [];
|
||||
let currentChunk: string[] = [];
|
||||
let currentLength = 0;
|
||||
|
||||
for (const split of splits) {
|
||||
const splitLength = this.tokenSize(split);
|
||||
|
||||
if (splitLength > chunkSize) {
|
||||
console.warn(
|
||||
`Got a split of size ${splitLength}, larger than chunk size ${chunkSize}.`,
|
||||
);
|
||||
}
|
||||
|
||||
if (currentLength + splitLength > chunkSize) {
|
||||
const chunk = currentChunk.join("").trim();
|
||||
if (chunk) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
currentChunk = [];
|
||||
currentLength = 0;
|
||||
|
||||
const overlapTokens = this.chunkOverlap;
|
||||
const overlapSplits: string[] = [];
|
||||
|
||||
let overlapLength = 0;
|
||||
while (
|
||||
overlapSplits.length < splits.length &&
|
||||
overlapLength < overlapTokens
|
||||
) {
|
||||
const overlapSplit = currentChunk.shift();
|
||||
if (!overlapSplit) break;
|
||||
overlapSplits.push(overlapSplit);
|
||||
overlapLength += this.tokenSize(overlapSplit);
|
||||
}
|
||||
|
||||
for (const overlapSplit of overlapSplits.reverse()) {
|
||||
currentChunk.push(overlapSplit);
|
||||
currentLength += this.tokenSize(overlapSplit);
|
||||
if (currentLength >= overlapTokens) break;
|
||||
}
|
||||
}
|
||||
|
||||
currentChunk.push(split);
|
||||
currentLength += splitLength;
|
||||
}
|
||||
|
||||
const finalChunk = currentChunk.join("").trim();
|
||||
if (finalChunk) {
|
||||
chunks.push(finalChunk);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the number of tokens in the text using the tokenizer.
|
||||
* @param text The text to tokenize.
|
||||
* @returns The number of tokens.
|
||||
*/
|
||||
private tokenSize(text: string): number {
|
||||
return this.#tokenizer.encode(text).length;
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,10 @@ import SentenceTokenizer from "./sentence_tokenizer";
|
||||
|
||||
export type TextSplitterFn = (text: string) => string[];
|
||||
|
||||
const truncateText = (text: string, textSplitter: TextSplitter): string => {
|
||||
export const truncateText = (
|
||||
text: string,
|
||||
textSplitter: TextSplitter,
|
||||
): string => {
|
||||
const chunks = textSplitter.splitText(text);
|
||||
return chunks[0] ?? text;
|
||||
};
|
||||
|
||||
@@ -12,11 +12,15 @@ export {
|
||||
defaultCondenseQuestionPrompt,
|
||||
defaultContextSystemPrompt,
|
||||
defaultKeywordExtractPrompt,
|
||||
defaultNodeTextTemplate,
|
||||
defaultQueryKeywordExtractPrompt,
|
||||
defaultQuestionExtractPrompt,
|
||||
defaultRefinePrompt,
|
||||
defaultSubQuestionPrompt,
|
||||
defaultSummaryPrompt,
|
||||
defaultTextQAPrompt,
|
||||
defaultTitleCombinePromptTemplate,
|
||||
defaultTitleExtractorPromptTemplate,
|
||||
defaultTreeSummarizePrompt,
|
||||
} from "./prompt";
|
||||
export type {
|
||||
@@ -25,9 +29,12 @@ export type {
|
||||
ContextSystemPrompt,
|
||||
KeywordExtractPrompt,
|
||||
QueryKeywordExtractPrompt,
|
||||
QuestionExtractPrompt,
|
||||
RefinePrompt,
|
||||
SubQuestionPrompt,
|
||||
SummaryPrompt,
|
||||
TextQAPrompt,
|
||||
TitleCombinePrompt,
|
||||
TitleExtractorPrompt,
|
||||
TreeSummarizePrompt,
|
||||
} from "./prompt";
|
||||
|
||||
@@ -13,8 +13,12 @@ export type CondenseQuestionPrompt = PromptTemplate<
|
||||
["chatHistory", "question"]
|
||||
>;
|
||||
export type ContextSystemPrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractPrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractPrompt = PromptTemplate<["context", "maxKeywords"]>;
|
||||
export type QueryKeywordExtractPrompt = PromptTemplate<["question"]>;
|
||||
export type QuestionExtractPrompt = PromptTemplate<["context", "numQuestions"]>;
|
||||
export type TitleExtractorPrompt = PromptTemplate<["context"]>;
|
||||
export type TitleCombinePrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractorPrompt = PromptTemplate<["context", "numKeywords"]>;
|
||||
|
||||
export const defaultTextQAPrompt: TextQAPrompt = new PromptTemplate({
|
||||
templateVars: ["context", "query"],
|
||||
@@ -64,11 +68,13 @@ export const defaultRefinePrompt: RefinePrompt = new PromptTemplate({
|
||||
templateVars: ["query", "existingAnswer", "context"],
|
||||
template: `The original query is as follows: {query}
|
||||
We have provided an existing answer: {existingAnswer}
|
||||
We have the opportunity to refine the existing answer (only if needed) with some more context below.
|
||||
We have the opportunity to refine the existing answer
|
||||
(only if needed) with some more context below.
|
||||
------------
|
||||
{context}
|
||||
------------
|
||||
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
|
||||
Given the new context, refine the original answer to better answer the query.
|
||||
If the context isn't useful, return the original answer.
|
||||
Refined Answer:`,
|
||||
});
|
||||
|
||||
@@ -251,3 +257,55 @@ export const defaultQueryKeywordExtractPrompt = new PromptTemplate({
|
||||
}).partialFormat({
|
||||
maxKeywords: "10",
|
||||
});
|
||||
|
||||
export const defaultQuestionExtractPrompt = new PromptTemplate({
|
||||
templateVars: ["numQuestions", "context"],
|
||||
template: `(
|
||||
"Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. "
|
||||
"Try using these summaries to generate better questions that this context can answer."
|
||||
"---------------------"
|
||||
"{context}"
|
||||
"---------------------"
|
||||
"Provide questions in the following format: 'QUESTIONS: <questions>'"
|
||||
)`,
|
||||
}).partialFormat({
|
||||
numQuestions: "5",
|
||||
});
|
||||
|
||||
export const defaultTitleExtractorPromptTemplate = new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `{context}
|
||||
Give a title that summarizes all of the unique entities, titles or themes found in the context.
|
||||
Title: `,
|
||||
});
|
||||
|
||||
export const defaultTitleCombinePromptTemplate = new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `{context}
|
||||
Based on the above candidate titles and contents, what is the comprehensive title for this document?
|
||||
Title: `,
|
||||
});
|
||||
|
||||
export const defaultKeywordExtractorPromptTemplate = new PromptTemplate({
|
||||
templateVars: ["context", "numKeywords"],
|
||||
template: `{context}
|
||||
Give {numKeywords} unique keywords for this document.
|
||||
Format as comma separated.
|
||||
Keywords: `,
|
||||
}).partialFormat({
|
||||
keywordCount: "5",
|
||||
});
|
||||
|
||||
export const defaultNodeTextTemplate = new PromptTemplate({
|
||||
templateVars: ["metadataStr", "content"],
|
||||
template: `[Excerpt from document]
|
||||
{metadataStr}
|
||||
Excerpt:
|
||||
-----
|
||||
{content}
|
||||
-----
|
||||
`,
|
||||
}).partialFormat({
|
||||
metadataStr: "",
|
||||
content: "",
|
||||
});
|
||||
|
||||
@@ -403,27 +403,27 @@ class MultiModal extends BaseSynthesizer {
|
||||
}
|
||||
}
|
||||
|
||||
export function getResponseSynthesizer(
|
||||
mode: ResponseMode,
|
||||
const modeToSynthesizer = {
|
||||
compact: CompactAndRefine,
|
||||
refine: Refine,
|
||||
tree_summarize: TreeSummarize,
|
||||
multi_modal: MultiModal,
|
||||
} as const;
|
||||
|
||||
export function getResponseSynthesizer<Mode extends ResponseMode>(
|
||||
mode: Mode,
|
||||
options: BaseSynthesizerOptions & {
|
||||
textQATemplate?: TextQAPrompt;
|
||||
refineTemplate?: RefinePrompt;
|
||||
summaryTemplate?: TreeSummarizePrompt;
|
||||
metadataMode?: MetadataMode;
|
||||
} = {},
|
||||
) {
|
||||
switch (mode) {
|
||||
case "compact": {
|
||||
return new CompactAndRefine(options);
|
||||
}
|
||||
case "refine": {
|
||||
return new Refine(options);
|
||||
}
|
||||
case "tree_summarize": {
|
||||
return new TreeSummarize(options);
|
||||
}
|
||||
case "multi_modal": {
|
||||
return new MultiModal(options);
|
||||
}
|
||||
): InstanceType<(typeof modeToSynthesizer)[Mode]> {
|
||||
const Synthesizer: (typeof modeToSynthesizer)[Mode] = modeToSynthesizer[mode];
|
||||
if (!Synthesizer) {
|
||||
throw new Error(`Invalid response mode: ${mode}`);
|
||||
}
|
||||
return new Synthesizer(options) as InstanceType<
|
||||
(typeof modeToSynthesizer)[Mode]
|
||||
>;
|
||||
}
|
||||
|
||||
@@ -479,7 +479,7 @@ export function buildNodeFromSplits(
|
||||
) {
|
||||
const imageDoc = doc as ImageNode;
|
||||
const imageNode = new ImageNode({
|
||||
id_: imageDoc.id_ ?? idGenerator(i, imageDoc),
|
||||
id_: idGenerator(i, imageDoc),
|
||||
text: textChunk,
|
||||
image: imageDoc.image,
|
||||
embedding: imageDoc.embedding,
|
||||
@@ -496,7 +496,7 @@ export function buildNodeFromSplits(
|
||||
) {
|
||||
const textDoc = doc as TextNode;
|
||||
const node = new TextNode({
|
||||
id_: textDoc.id_ ?? idGenerator(i, textDoc),
|
||||
id_: idGenerator(i, textDoc),
|
||||
text: textChunk,
|
||||
embedding: textDoc.embedding,
|
||||
excludedEmbedMetadataKeys: [...textDoc.excludedEmbedMetadataKeys],
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
import type { JSONSchemaType } from "ajv";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import type { JSONValue } from "../global";
|
||||
import type { BaseTool, ToolMetadata } from "../llms";
|
||||
|
||||
const kOriginalFn = Symbol("originalFn");
|
||||
|
||||
export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
|
||||
implements BaseTool<T>
|
||||
{
|
||||
[kOriginalFn]?: (input: T) => R;
|
||||
|
||||
#fn: (input: T) => R;
|
||||
#metadata: ToolMetadata<JSONSchemaType<T>>;
|
||||
// todo: for the future, we can use zod to validate the input parameters
|
||||
#zodType: z.ZodType<T> | null = null;
|
||||
constructor(
|
||||
fn: (input: T) => R,
|
||||
metadata: ToolMetadata<JSONSchemaType<T>>,
|
||||
zodType?: z.ZodType<T>,
|
||||
) {
|
||||
this.#fn = fn;
|
||||
this.#metadata = metadata;
|
||||
if (zodType) {
|
||||
this.#zodType = zodType;
|
||||
}
|
||||
}
|
||||
|
||||
static from<T>(
|
||||
fn: (input: T) => JSONValue | Promise<JSONValue>,
|
||||
schema: ToolMetadata<JSONSchemaType<T>>,
|
||||
): FunctionTool<T, JSONValue | Promise<JSONValue>>;
|
||||
static from<T, R extends z.ZodType<T>>(
|
||||
fn: (input: T) => JSONValue | Promise<JSONValue>,
|
||||
schema: Omit<ToolMetadata, "parameters"> & {
|
||||
parameters: R;
|
||||
},
|
||||
): FunctionTool<T, JSONValue>;
|
||||
static from(fn: any, schema: any): any {
|
||||
if (schema.parameter instanceof z.ZodSchema) {
|
||||
const jsonSchema = zodToJsonSchema(schema.parameter);
|
||||
return new FunctionTool(
|
||||
fn,
|
||||
{
|
||||
...schema,
|
||||
parameters: jsonSchema,
|
||||
},
|
||||
schema.parameter,
|
||||
);
|
||||
}
|
||||
return new FunctionTool(fn, schema);
|
||||
}
|
||||
|
||||
get metadata(): BaseTool<T>["metadata"] {
|
||||
return this.#metadata as BaseTool<T>["metadata"];
|
||||
}
|
||||
|
||||
call(input: T) {
|
||||
return this.#fn.call(null, input);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
export { FunctionTool } from "./function-tool";
|
||||
@@ -2,6 +2,7 @@ import {
|
||||
SentenceSplitter,
|
||||
splitBySentenceTokenizer,
|
||||
} from "@llamaindex/core/node-parser";
|
||||
import { Document } from "@llamaindex/core/schema";
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
describe("sentence splitter", () => {
|
||||
@@ -115,4 +116,26 @@ describe("sentence splitter", () => {
|
||||
const split = splitBySentenceTokenizer();
|
||||
expect(split(text)).toEqual([text]);
|
||||
});
|
||||
|
||||
test("split nodes with UUID IDs and correct relationships", () => {
|
||||
const UUID_REGEX =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
|
||||
const sentenceSplitter = new SentenceSplitter();
|
||||
const docId = "test-doc-id";
|
||||
const doc = new Document({
|
||||
id_: docId,
|
||||
text: "This is a test sentence. This is another test sentence.",
|
||||
});
|
||||
const nodes = sentenceSplitter.getNodesFromDocuments([doc]);
|
||||
nodes.forEach((node) => {
|
||||
// test node id should match uuid regex
|
||||
expect(node.id_).toMatch(UUID_REGEX);
|
||||
|
||||
// test source reference to the doc ID
|
||||
const source = node.relationships?.SOURCE;
|
||||
expect(source).toBeDefined();
|
||||
expect(source).toHaveProperty("nodeId");
|
||||
expect((source as { nodeId: string }).nodeId).toEqual(docId);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import { FunctionTool } from "@llamaindex/core/tools";
|
||||
import { describe, test } from "vitest";
|
||||
import { z } from "zod";
|
||||
|
||||
describe("FunctionTool", () => {
|
||||
test("type system", () => {
|
||||
FunctionTool.from((input: string) => input, {
|
||||
name: "test",
|
||||
description: "test",
|
||||
});
|
||||
FunctionTool.from(({ input }: { input: string }) => input, {
|
||||
name: "test",
|
||||
description: "test",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
input: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
required: ["input"],
|
||||
},
|
||||
});
|
||||
const inputSchema = z
|
||||
.object({
|
||||
input: z.string(),
|
||||
})
|
||||
.required();
|
||||
FunctionTool.from(({ input }: { input: string }) => input, {
|
||||
name: "test",
|
||||
description: "test",
|
||||
parameters: inputSchema,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"type": "module",
|
||||
"main": "./dist/index.cjs",
|
||||
"module": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"exports": "./dist/index.js",
|
||||
"private": true
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"type": "module",
|
||||
"main": "./dist/index.cjs",
|
||||
"module": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"exports": "./dist/index.js",
|
||||
"private": true
|
||||
}
|
||||
Vendored
+7
@@ -1,5 +1,12 @@
|
||||
# @llamaindex/env
|
||||
|
||||
## 0.1.14
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- ae49ff4: feat: use `gpt-tokenizer`
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
|
||||
## 0.1.13
|
||||
|
||||
### Patch Changes
|
||||
|
||||
Vendored
+4
-4
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/env",
|
||||
"description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
|
||||
"version": "0.1.13",
|
||||
"version": "0.1.14",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
@@ -75,8 +75,8 @@
|
||||
"@swc/core": "^1.7.22",
|
||||
"@xenova/transformers": "^2.17.2",
|
||||
"concurrently": "^8.2.2",
|
||||
"gpt-tokenizer": "^2.5.0",
|
||||
"pathe": "^1.1.2",
|
||||
"tiktoken": "^1.0.16",
|
||||
"vitest": "^2.0.5"
|
||||
},
|
||||
"dependencies": {
|
||||
@@ -85,9 +85,9 @@
|
||||
"peerDependencies": {
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
"@xenova/transformers": "^2.17.2",
|
||||
"gpt-tokenizer": "^2.5.0",
|
||||
"js-tiktoken": "^1.0.12",
|
||||
"pathe": "^1.1.2",
|
||||
"tiktoken": "^1.0.15"
|
||||
"pathe": "^1.1.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@aws-crypto/sha256-js": {
|
||||
|
||||
Vendored
+1
-1
@@ -14,7 +14,7 @@ export {
|
||||
type OnLoad,
|
||||
} from "./multi-model/index.browser.js";
|
||||
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
|
||||
|
||||
export { NotSupportCurrentRuntimeClass } from "./utils/shared.js";
|
||||
// @ts-expect-error
|
||||
if (typeof window === "undefined") {
|
||||
console.warn(
|
||||
|
||||
Vendored
+1
@@ -14,3 +14,4 @@ export {
|
||||
type OnLoad,
|
||||
} from "./multi-model/index.non-nodejs.js";
|
||||
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
|
||||
export { NotSupportCurrentRuntimeClass } from "./utils/shared.js";
|
||||
|
||||
Vendored
+1
@@ -47,6 +47,7 @@ export {
|
||||
getEnv,
|
||||
setEnvs,
|
||||
} from "./utils/index.js";
|
||||
export { NotSupportCurrentRuntimeClass } from "./utils/shared.js";
|
||||
export {
|
||||
createWriteStream,
|
||||
EOL,
|
||||
|
||||
Vendored
+2
@@ -7,6 +7,8 @@
|
||||
*/
|
||||
import { INTERNAL_ENV } from "./utils/index.js";
|
||||
|
||||
export { NotSupportCurrentRuntimeClass } from "./utils/shared.js";
|
||||
|
||||
export * from "./node-polyfill.js";
|
||||
|
||||
export function getEnv(name: string): string | undefined {
|
||||
|
||||
Vendored
+7
-10
@@ -2,21 +2,18 @@
|
||||
import type { Tokenizer } from "./types.js";
|
||||
import { Tokenizers } from "./types.js";
|
||||
|
||||
import { get_encoding } from "tiktoken";
|
||||
import cl100kBase from "gpt-tokenizer";
|
||||
|
||||
class TokenizerSingleton {
|
||||
private defaultTokenizer: Tokenizer;
|
||||
#defaultTokenizer: Tokenizer;
|
||||
|
||||
constructor() {
|
||||
const encoding = get_encoding("cl100k_base");
|
||||
|
||||
this.defaultTokenizer = {
|
||||
encode: (text: string) => {
|
||||
return encoding.encode(text);
|
||||
this.#defaultTokenizer = {
|
||||
encode: (text: string): Uint32Array => {
|
||||
return new Uint32Array(cl100kBase.encode(text));
|
||||
},
|
||||
decode: (tokens: Uint32Array) => {
|
||||
const text = encoding.decode(tokens);
|
||||
return new TextDecoder().decode(text);
|
||||
return cl100kBase.decode(tokens);
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -26,7 +23,7 @@ class TokenizerSingleton {
|
||||
throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
|
||||
}
|
||||
|
||||
return this.defaultTokenizer;
|
||||
return this.#defaultTokenizer;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Vendored
+13
@@ -0,0 +1,13 @@
|
||||
export class NotSupportCurrentRuntimeClass {
|
||||
constructor(runtime: string) {
|
||||
throw new Error(`Current environment ${runtime} is not supported`);
|
||||
}
|
||||
|
||||
static bind(runtime: string) {
|
||||
return class extends NotSupportCurrentRuntimeClass {
|
||||
constructor(...args: any[]) {
|
||||
super(runtime);
|
||||
}
|
||||
} as any;
|
||||
}
|
||||
}
|
||||
Vendored
+11
@@ -0,0 +1,11 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { tokenizers } from "../src/tokenizers/node.js";
|
||||
|
||||
describe("tokenizer", () => {
|
||||
it("should tokenize text", () => {
|
||||
const tokenizer = tokenizers.tokenizer();
|
||||
expect(tokenizer.decode(tokenizer.encode("hello world"))).toBe(
|
||||
"hello world",
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,80 @@
|
||||
# @llamaindex/experimental
|
||||
|
||||
## 0.0.102
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.0.101
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.100
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.99
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.98
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.97
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.96
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.95
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.94
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.93
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/experimental",
|
||||
"description": "Experimental package for LlamaIndexTS",
|
||||
"version": "0.0.93",
|
||||
"version": "0.0.102",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,118 @@
|
||||
# llamaindex
|
||||
|
||||
## 0.7.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- ae49ff4: feat: use `gpt-tokenizer`
|
||||
- 4c38c1b: fix(cloud): do not detect file type in llama parse
|
||||
- a75af83: feat: allow passing perform setup in pg vector store
|
||||
- a75af83: refactor: move some llm and embedding to single package
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [24d065f]
|
||||
- Updated dependencies [a75af83]
|
||||
- @llamaindex/env@0.1.14
|
||||
- @llamaindex/cloud@1.0.1
|
||||
- @llamaindex/huggingface@0.0.2
|
||||
- @llamaindex/portkey-ai@0.0.2
|
||||
- @llamaindex/anthropic@0.0.2
|
||||
- @llamaindex/deepinfra@0.0.2
|
||||
- @llamaindex/replicate@0.0.2
|
||||
- @llamaindex/ollama@0.0.9
|
||||
- @llamaindex/openai@0.1.18
|
||||
- @llamaindex/clip@0.0.2
|
||||
- @llamaindex/groq@0.0.17
|
||||
- @llamaindex/core@0.3.1
|
||||
|
||||
## 0.7.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- 1364e8e: update metadata extractors to use PromptTemplate
|
||||
- 96fc69c: Correct initialization of QuestionsAnsweredExtractor so that it uses the promptTemplate arg when passed in
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 3b7736f: feat: added gemini 002 support
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [96fc69c]
|
||||
- @llamaindex/core@0.3.0
|
||||
- @llamaindex/cloud@1.0.0
|
||||
- @llamaindex/ollama@0.0.8
|
||||
- @llamaindex/openai@0.1.17
|
||||
- @llamaindex/groq@0.0.16
|
||||
|
||||
## 0.6.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5729bd9: Fix LlamaCloud API calls for ensuring an index and for file uploads
|
||||
|
||||
## 0.6.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 6f75306: feat: support metadata filters for AstraDB
|
||||
- 94cb4ad: feat: Add metadata filters to ChromaDb and update to 1.9.2
|
||||
|
||||
## 0.6.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 6a9a7b1: fix: take init api key into account
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- @llamaindex/openai@0.1.16
|
||||
- @llamaindex/groq@0.0.15
|
||||
|
||||
## 0.6.19
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 62cba52: Add ensureIndex function to LlamaCloudIndex
|
||||
- d265e96: fix: ignore resolving unpdf for nextjs
|
||||
- d30bbf7: Convert undefined values to null in LlamaCloud filters
|
||||
- 53fd00a: Fix getPipelineId in LlamaCloudIndex
|
||||
|
||||
## 0.6.18
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5f67820: Fix that node parsers generate nodes with UUIDs
|
||||
- fe08d04: Fix LlamaCloud retrieval with multiple pipelines
|
||||
- Updated dependencies [5f67820]
|
||||
- @llamaindex/core@0.2.12
|
||||
- @llamaindex/cloud@0.2.14
|
||||
- @llamaindex/ollama@0.0.7
|
||||
- @llamaindex/openai@0.1.15
|
||||
- @llamaindex/groq@0.0.14
|
||||
|
||||
## 0.6.17
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- ee697fb: fix: generate uuid when inserting to Qdrant
|
||||
- Updated dependencies [ee697fb]
|
||||
- @llamaindex/core@0.2.11
|
||||
- @llamaindex/cloud@0.2.13
|
||||
- @llamaindex/ollama@0.0.6
|
||||
- @llamaindex/openai@0.1.14
|
||||
- @llamaindex/groq@0.0.13
|
||||
|
||||
## 0.6.16
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 63e9846: fix: preFilters does not work with asQueryEngine
|
||||
- 6f3a31c: feat: add metadata filters for Qdrant vector store
|
||||
- Updated dependencies [3489e7d]
|
||||
- Updated dependencies [468bda5]
|
||||
- @llamaindex/core@0.2.10
|
||||
- @llamaindex/cloud@0.2.12
|
||||
- @llamaindex/ollama@0.0.5
|
||||
- @llamaindex/openai@0.1.13
|
||||
- @llamaindex/groq@0.0.12
|
||||
|
||||
## 0.6.15
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,79 @@
|
||||
# @llamaindex/cloudflare-worker-agent-test
|
||||
|
||||
## 0.0.86
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.0.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloudflare-worker-agent-test",
|
||||
"version": "0.0.77",
|
||||
"version": "0.0.86",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,5 +1,38 @@
|
||||
# @llamaindex/llama-parse-browser-test
|
||||
|
||||
## 0.0.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [24d065f]
|
||||
- Updated dependencies [a75af83]
|
||||
- @llamaindex/cloud@1.0.1
|
||||
|
||||
## 0.0.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@1.0.0
|
||||
|
||||
## 0.0.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@0.2.14
|
||||
|
||||
## 0.0.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@0.2.13
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @llamaindex/cloud@0.2.12
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/llama-parse-browser-test",
|
||||
"private": true,
|
||||
"version": "0.0.7",
|
||||
"version": "0.0.12",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
|
||||
@@ -1,5 +1,79 @@
|
||||
# @llamaindex/next-agent-test
|
||||
|
||||
## 0.1.86
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.1.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.1.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.1.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.1.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.1.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.1.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.1.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.1.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.1.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-agent-test",
|
||||
"version": "0.1.77",
|
||||
"version": "0.1.86",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,79 @@
|
||||
# test-edge-runtime
|
||||
|
||||
## 0.1.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.1.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.1.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.1.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.1.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.1.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.1.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.1.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.1.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.1.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/nextjs-edge-runtime-test",
|
||||
"version": "0.1.76",
|
||||
"version": "0.1.85",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,19 +1,12 @@
|
||||
import { tokenizerResultPromise } from "@/utils/llm";
|
||||
import { use } from "react";
|
||||
import "@/utils/llm";
|
||||
|
||||
export const runtime = "edge";
|
||||
|
||||
export default function Home() {
|
||||
const result = use(tokenizerResultPromise);
|
||||
return (
|
||||
<main>
|
||||
<div>
|
||||
<h1>Next.js Edge Runtime</h1>
|
||||
<div>
|
||||
{result.map((value, index) => (
|
||||
<span key={index}>{value}</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
);
|
||||
|
||||
@@ -1,23 +1,8 @@
|
||||
// test runtime
|
||||
import "llamaindex";
|
||||
import { ClipEmbedding } from "llamaindex";
|
||||
import "llamaindex/readers/SimpleDirectoryReader";
|
||||
|
||||
// @ts-expect-error
|
||||
if (typeof EdgeRuntime !== "string") {
|
||||
throw new Error("Expected run in EdgeRuntime");
|
||||
}
|
||||
|
||||
export const tokenizerResultPromise = new Promise<number[]>(
|
||||
(resolve, reject) => {
|
||||
const embedding = new ClipEmbedding();
|
||||
//#region make sure @xenova/transformers is working in edge runtime
|
||||
embedding
|
||||
.getTokenizer()
|
||||
.then((tokenizer) => {
|
||||
resolve(tokenizer.encode("hello world"));
|
||||
})
|
||||
.catch(reject);
|
||||
//#endregion
|
||||
},
|
||||
);
|
||||
|
||||
@@ -1,5 +1,79 @@
|
||||
# @llamaindex/next-node-runtime
|
||||
|
||||
## 0.0.67
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.0.66
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.65
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.64
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.63
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.62
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.61
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.60
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.59
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.58
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-node-runtime-test",
|
||||
"version": "0.0.58",
|
||||
"version": "0.0.67",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,79 @@
|
||||
# @llamaindex/waku-query-engine-test
|
||||
|
||||
## 0.0.86
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae49ff4]
|
||||
- Updated dependencies [4c38c1b]
|
||||
- Updated dependencies [a75af83]
|
||||
- Updated dependencies [a75af83]
|
||||
- llamaindex@0.7.1
|
||||
|
||||
## 0.0.85
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [1364e8e]
|
||||
- Updated dependencies [3b7736f]
|
||||
- Updated dependencies [96fc69c]
|
||||
- llamaindex@0.7.0
|
||||
|
||||
## 0.0.84
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5729bd9]
|
||||
- llamaindex@0.6.22
|
||||
|
||||
## 0.0.83
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6f75306]
|
||||
- Updated dependencies [94cb4ad]
|
||||
- llamaindex@0.6.21
|
||||
|
||||
## 0.0.82
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [6a9a7b1]
|
||||
- llamaindex@0.6.20
|
||||
|
||||
## 0.0.81
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [62cba52]
|
||||
- Updated dependencies [d265e96]
|
||||
- Updated dependencies [d30bbf7]
|
||||
- Updated dependencies [53fd00a]
|
||||
- llamaindex@0.6.19
|
||||
|
||||
## 0.0.80
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5f67820]
|
||||
- Updated dependencies [fe08d04]
|
||||
- llamaindex@0.6.18
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ee697fb]
|
||||
- llamaindex@0.6.17
|
||||
|
||||
## 0.0.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [63e9846]
|
||||
- Updated dependencies [6f3a31c]
|
||||
- llamaindex@0.6.16
|
||||
|
||||
## 0.0.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/waku-query-engine-test",
|
||||
"version": "0.0.77",
|
||||
"version": "0.0.86",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
@@ -10,17 +10,16 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"llamaindex": "workspace:*",
|
||||
"react": "19.0.0-rc-7771d3a7-20240827",
|
||||
"react-dom": "19.0.0-rc-7771d3a7-20240827",
|
||||
"react-server-dom-webpack": "19.0.0-rc-7771d3a7-20240827",
|
||||
"waku": "0.21.1"
|
||||
"react": "19.0.0-rc-bf7e210c-20241017",
|
||||
"react-dom": "19.0.0-rc-bf7e210c-20241017",
|
||||
"react-server-dom-webpack": "19.0.0-rc-bf7e210c-20241017",
|
||||
"waku": "0.21.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "18.3.5",
|
||||
"@types/react-dom": "18.3.0",
|
||||
"autoprefixer": "10.4.20",
|
||||
"tailwindcss": "3.4.10",
|
||||
"typescript": "5.6.2",
|
||||
"vite-plugin-wasm": "^3.3.0"
|
||||
"@types/react": "18.3.11",
|
||||
"@types/react-dom": "18.3.1",
|
||||
"autoprefixer": "^10.4.20",
|
||||
"tailwindcss": "^3.4.14",
|
||||
"typescript": "5.6.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
import wasm from "vite-plugin-wasm";
|
||||
|
||||
export default {
|
||||
plugins: [wasm()],
|
||||
ssr: {
|
||||
external: ["tiktoken"],
|
||||
},
|
||||
};
|
||||
@@ -105,3 +105,22 @@ await test("simple node", async (t) => {
|
||||
assert.deepStrictEqual(result.nodes, []);
|
||||
}
|
||||
});
|
||||
|
||||
await test("no setup", async (t) => {
|
||||
// @ts-expect-error private method
|
||||
assert.ok(PGVectorStore.prototype.checkSchema);
|
||||
// @ts-expect-error private method
|
||||
const Mock = class extends PGVectorStore {
|
||||
private override async checkSchema(): Promise<any> {
|
||||
throw new Error("should not be called");
|
||||
}
|
||||
};
|
||||
const vectorStore = new Mock({
|
||||
clientConfig: pgConfig,
|
||||
performSetup: false,
|
||||
});
|
||||
const db = await vectorStore.client();
|
||||
t.after(async () => {
|
||||
await db.close();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "llamaindex",
|
||||
"version": "0.6.15",
|
||||
"version": "0.7.1",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
"keywords": [
|
||||
@@ -29,13 +29,18 @@
|
||||
"@google-cloud/vertexai": "1.2.0",
|
||||
"@google/generative-ai": "0.12.0",
|
||||
"@grpc/grpc-js": "^1.11.1",
|
||||
"@huggingface/inference": "^2.8.0",
|
||||
"@llamaindex/anthropic": "workspace:*",
|
||||
"@llamaindex/clip": "workspace:*",
|
||||
"@llamaindex/cloud": "workspace:*",
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/deepinfra": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*",
|
||||
"@llamaindex/groq": "workspace:*",
|
||||
"@llamaindex/huggingface": "workspace:*",
|
||||
"@llamaindex/ollama": "workspace:*",
|
||||
"@llamaindex/openai": "workspace:*",
|
||||
"@llamaindex/portkey-ai": "workspace:*",
|
||||
"@llamaindex/replicate": "workspace:^0.0.2",
|
||||
"@mistralai/mistralai": "^1.0.4",
|
||||
"@mixedbread-ai/sdk": "^2.2.11",
|
||||
"@pinecone-database/pinecone": "^3.0.2",
|
||||
@@ -48,9 +53,10 @@
|
||||
"@zilliz/milvus2-sdk-node": "^2.4.6",
|
||||
"ajv": "^8.17.1",
|
||||
"assemblyai": "^4.7.0",
|
||||
"chromadb": "1.8.1",
|
||||
"chromadb": "1.9.2",
|
||||
"cohere-ai": "7.13.0",
|
||||
"discord-api-types": "^0.37.98",
|
||||
"gpt-tokenizer": "^2.5.0",
|
||||
"groq-sdk": "^0.6.1",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"lodash": "^4.17.21",
|
||||
@@ -62,10 +68,8 @@
|
||||
"openai": "^4.60.0",
|
||||
"papaparse": "^5.4.1",
|
||||
"pathe": "^1.1.2",
|
||||
"portkey-ai": "0.1.16",
|
||||
"rake-modified": "^1.0.8",
|
||||
"string-strip-html": "^13.4.8",
|
||||
"tiktoken": "^1.0.15",
|
||||
"unpdf": "^0.11.0",
|
||||
"weaviate-client": "^3.1.4",
|
||||
"wikipedia": "^2.1.2",
|
||||
|
||||
@@ -11,7 +11,6 @@ import {
|
||||
type NodeParser,
|
||||
SentenceSplitter,
|
||||
} from "@llamaindex/core/node-parser";
|
||||
import type { LoadTransformerEvent } from "@llamaindex/env";
|
||||
import { AsyncLocalStorage } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "./ServiceContext.js";
|
||||
import {
|
||||
@@ -20,12 +19,6 @@ import {
|
||||
withEmbeddedModel,
|
||||
} from "./internal/settings/EmbedModel.js";
|
||||
|
||||
declare module "@llamaindex/core/global" {
|
||||
interface LlamaIndexEventMaps {
|
||||
"load-transformers": LoadTransformerEvent;
|
||||
}
|
||||
}
|
||||
|
||||
export type PromptConfig = {
|
||||
llm?: string;
|
||||
lang?: string;
|
||||
|
||||
@@ -1,43 +1 @@
|
||||
import {
|
||||
LLMAgent,
|
||||
LLMAgentWorker,
|
||||
type LLMAgentParams,
|
||||
} from "@llamaindex/core/agent";
|
||||
import type {
|
||||
NonStreamingChatEngineParams,
|
||||
StreamingChatEngineParams,
|
||||
} from "@llamaindex/core/chat-engine";
|
||||
import type { EngineResponse } from "@llamaindex/core/schema";
|
||||
import { Settings } from "../Settings.js";
|
||||
import { Anthropic } from "../llm/anthropic.js";
|
||||
|
||||
export type AnthropicAgentParams = LLMAgentParams;
|
||||
|
||||
export class AnthropicAgentWorker extends LLMAgentWorker {}
|
||||
|
||||
export class AnthropicAgent extends LLMAgent {
|
||||
constructor(params: AnthropicAgentParams) {
|
||||
const llm =
|
||||
params.llm ??
|
||||
(Settings.llm instanceof Anthropic
|
||||
? (Settings.llm as Anthropic)
|
||||
: new Anthropic());
|
||||
super({
|
||||
...params,
|
||||
llm,
|
||||
});
|
||||
}
|
||||
|
||||
async chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
|
||||
async chat(params: StreamingChatEngineParams): Promise<never>;
|
||||
override async chat(
|
||||
params: NonStreamingChatEngineParams | StreamingChatEngineParams,
|
||||
) {
|
||||
const { stream } = params;
|
||||
if (stream) {
|
||||
// Anthropic does support this, but looks like it's not supported in the LITS LLM
|
||||
throw new Error("Anthropic does not support streaming");
|
||||
}
|
||||
return super.chat(params);
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/anthropic";
|
||||
|
||||
@@ -41,7 +41,7 @@ export class LLamaCloudFileService {
|
||||
) {
|
||||
initService();
|
||||
const { data: file } = await FilesService.uploadFileApiV1FilesPost({
|
||||
path: { project_id: projectId },
|
||||
query: { project_id: projectId },
|
||||
body: {
|
||||
upload_file: uploadFile,
|
||||
},
|
||||
@@ -85,7 +85,7 @@ export class LLamaCloudFileService {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100)); // Sleep for 100ms
|
||||
}
|
||||
throw new Error(
|
||||
`File processing did not complete after ${maxAttempts} attempts.`,
|
||||
`File processing did not complete after ${maxAttempts} attempts. Check your LlamaCloud index at https://cloud.llamaindex.ai/project/${projectId}/deploy/${pipelineId} for more details.`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
|
||||
import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
|
||||
import type { Document, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { Document } from "@llamaindex/core/schema";
|
||||
import { RetrieverQueryEngine } from "../engines/query/RetrieverQueryEngine.js";
|
||||
import type { BaseNodePostprocessor } from "../postprocessors/types.js";
|
||||
import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js";
|
||||
import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js";
|
||||
import { getPipelineCreate } from "./config.js";
|
||||
import type { CloudConstructorParams } from "./type.js";
|
||||
import { getAppBaseUrl, getProjectId, initService } from "./utils.js";
|
||||
import {
|
||||
getAppBaseUrl,
|
||||
getPipelineId,
|
||||
getProjectId,
|
||||
initService,
|
||||
} from "./utils.js";
|
||||
|
||||
import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api";
|
||||
import { SentenceSplitter } from "@llamaindex/core/node-parser";
|
||||
import { PipelinesService, type PipelineCreate } from "@llamaindex/cloud/api";
|
||||
import type { BaseRetriever } from "@llamaindex/core/retriever";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
import { Settings } from "../Settings.js";
|
||||
|
||||
export class LlamaCloudIndex {
|
||||
@@ -28,10 +30,7 @@ export class LlamaCloudIndex {
|
||||
verbose = Settings.debug,
|
||||
raiseOnError = false,
|
||||
): Promise<void> {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
if (verbose) {
|
||||
console.log("Waiting for pipeline ingestion: ");
|
||||
@@ -78,10 +77,7 @@ export class LlamaCloudIndex {
|
||||
verbose = Settings.debug,
|
||||
raiseOnError = false,
|
||||
): Promise<void> {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
if (verbose) {
|
||||
console.log("Loading data: ");
|
||||
@@ -143,17 +139,13 @@ export class LlamaCloudIndex {
|
||||
public async getPipelineId(
|
||||
name?: string,
|
||||
projectName?: string,
|
||||
organizationId?: string,
|
||||
): Promise<string> {
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
path: {
|
||||
project_id: await this.getProjectId(projectName),
|
||||
project_name: name ?? this.params.name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
return pipelines[0]!.id;
|
||||
return await getPipelineId(
|
||||
name ?? this.params.name,
|
||||
projectName ?? this.params.projectName,
|
||||
organizationId ?? this.params.organizationId,
|
||||
);
|
||||
}
|
||||
|
||||
public async getProjectId(
|
||||
@@ -166,75 +158,42 @@ export class LlamaCloudIndex {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds documents to the given index parameters. If the index does not exist, it will be created.
|
||||
*
|
||||
* @param params - An object containing the following properties:
|
||||
* - documents: An array of Document objects to be added to the index.
|
||||
* - verbose: Optional boolean to enable verbose logging.
|
||||
* - Additional properties from CloudConstructorParams.
|
||||
* @returns A Promise that resolves to a new LlamaCloudIndex instance.
|
||||
*/
|
||||
static async fromDocuments(
|
||||
params: {
|
||||
documents: Document[];
|
||||
transformations?: TransformComponent[];
|
||||
verbose?: boolean;
|
||||
} & CloudConstructorParams,
|
||||
config?: {
|
||||
embedding: PipelineCreate["embedding_config"];
|
||||
transform: PipelineCreate["transform_config"];
|
||||
},
|
||||
): Promise<LlamaCloudIndex> {
|
||||
initService(params);
|
||||
const defaultTransformations: TransformComponent[] = [
|
||||
new SentenceSplitter(),
|
||||
new OpenAIEmbedding({
|
||||
apiKey: getEnv("OPENAI_API_KEY"),
|
||||
}),
|
||||
];
|
||||
const index = new LlamaCloudIndex({ ...params });
|
||||
await index.ensureIndex({ ...config, verbose: params.verbose ?? false });
|
||||
await index.addDocuments(params.documents, params.verbose);
|
||||
return index;
|
||||
}
|
||||
|
||||
async addDocuments(documents: Document[], verbose?: boolean): Promise<void> {
|
||||
const apiUrl = getAppBaseUrl();
|
||||
|
||||
const pipelineCreateParams = await getPipelineCreate({
|
||||
pipelineName: params.name,
|
||||
pipelineType: "MANAGED",
|
||||
inputNodes: params.documents,
|
||||
transformations: params.transformations ?? defaultTransformations,
|
||||
});
|
||||
|
||||
const { data: project } =
|
||||
await ProjectsService.upsertProjectApiV1ProjectsPut({
|
||||
path: {
|
||||
organization_id: params.organizationId,
|
||||
},
|
||||
body: {
|
||||
name: params.projectName ?? "default",
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (!project.id) {
|
||||
throw new Error("Project ID should be defined");
|
||||
}
|
||||
|
||||
const { data: pipeline } =
|
||||
await PipelinesService.upsertPipelineApiV1PipelinesPut({
|
||||
path: {
|
||||
project_id: project.id,
|
||||
},
|
||||
body: pipelineCreateParams.configured_transformations
|
||||
? {
|
||||
name: params.name,
|
||||
configured_transformations:
|
||||
pipelineCreateParams.configured_transformations,
|
||||
}
|
||||
: {
|
||||
name: params.name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (!pipeline.id) {
|
||||
throw new Error("Pipeline ID must be defined");
|
||||
}
|
||||
|
||||
if (params.verbose) {
|
||||
console.log(`Created pipeline ${pipeline.id} with name ${params.name}`);
|
||||
}
|
||||
const projectId = await this.getProjectId();
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut(
|
||||
{
|
||||
path: {
|
||||
pipeline_id: pipeline.id,
|
||||
pipeline_id: pipelineId,
|
||||
},
|
||||
body: params.documents.map((doc) => ({
|
||||
body: documents.map((doc) => ({
|
||||
metadata: doc.metadata,
|
||||
text: doc.text,
|
||||
excluded_embed_metadata_keys: doc.excludedEmbedMetadataKeys,
|
||||
@@ -248,7 +207,7 @@ export class LlamaCloudIndex {
|
||||
const { data: pipelineStatus } =
|
||||
await PipelinesService.getPipelineStatusApiV1PipelinesPipelineIdStatusGet(
|
||||
{
|
||||
path: { pipeline_id: pipeline.id },
|
||||
path: { pipeline_id: pipelineId },
|
||||
throwOnError: true,
|
||||
},
|
||||
);
|
||||
@@ -262,32 +221,30 @@ export class LlamaCloudIndex {
|
||||
|
||||
if (pipelineStatus.status === "ERROR") {
|
||||
console.error(
|
||||
`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${project.id}/deploy/${pipeline.id}`,
|
||||
`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`,
|
||||
);
|
||||
throw new Error("Some documents failed to ingest");
|
||||
}
|
||||
|
||||
if (pipelineStatus.status === "PARTIAL_SUCCESS") {
|
||||
console.info(
|
||||
`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${project.id}/deploy/${pipeline.id}`,
|
||||
`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if (params.verbose) {
|
||||
if (verbose) {
|
||||
process.stdout.write(".");
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
if (params.verbose) {
|
||||
if (verbose) {
|
||||
console.info(
|
||||
`Ingestion completed, find your index at ${apiUrl}/project/${project.id}/deploy/${pipeline.id}`,
|
||||
`Ingestion completed, find your index at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`,
|
||||
);
|
||||
}
|
||||
|
||||
return new LlamaCloudIndex({ ...params });
|
||||
}
|
||||
|
||||
asRetriever(params: CloudRetrieveParams = {}): BaseRetriever {
|
||||
@@ -308,20 +265,12 @@ export class LlamaCloudIndex {
|
||||
return new RetrieverQueryEngine(
|
||||
retriever,
|
||||
params?.responseSynthesizer,
|
||||
params?.preFilters,
|
||||
params?.nodePostprocessors,
|
||||
);
|
||||
}
|
||||
|
||||
async insert(document: Document) {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
|
||||
if (!pipelineId) {
|
||||
throw new Error("We couldn't find the pipeline ID for the given name");
|
||||
}
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost(
|
||||
{
|
||||
@@ -344,14 +293,7 @@ export class LlamaCloudIndex {
|
||||
}
|
||||
|
||||
async delete(document: Document) {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
|
||||
if (!pipelineId) {
|
||||
throw new Error("We couldn't find the pipeline ID for the given name");
|
||||
}
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete(
|
||||
{
|
||||
@@ -366,14 +308,7 @@ export class LlamaCloudIndex {
|
||||
}
|
||||
|
||||
async refreshDoc(document: Document) {
|
||||
const pipelineId = await this.getPipelineId(
|
||||
this.params.name,
|
||||
this.params.projectName,
|
||||
);
|
||||
|
||||
if (!pipelineId) {
|
||||
throw new Error("We couldn't find the pipeline ID for the given name");
|
||||
}
|
||||
const pipelineId = await this.getPipelineId();
|
||||
|
||||
await PipelinesService.upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut(
|
||||
{
|
||||
@@ -394,4 +329,71 @@ export class LlamaCloudIndex {
|
||||
|
||||
await this.waitForDocumentIngestion([document.id_]);
|
||||
}
|
||||
|
||||
public async ensureIndex(config?: {
|
||||
embedding?: PipelineCreate["embedding_config"];
|
||||
transform?: PipelineCreate["transform_config"];
|
||||
verbose?: boolean;
|
||||
}): Promise<void> {
|
||||
const projectId = await this.getProjectId();
|
||||
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
query: {
|
||||
project_id: projectId,
|
||||
pipeline_name: this.params.name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (pipelines.length === 0) {
|
||||
// no pipeline found, create a new one
|
||||
let embeddingConfig = config?.embedding;
|
||||
if (!embeddingConfig) {
|
||||
// no embedding config provided, use OpenAI as default
|
||||
const openAIApiKey = getEnv("OPENAI_API_KEY");
|
||||
const embeddingModel = getEnv("EMBEDDING_MODEL");
|
||||
if (!openAIApiKey || !embeddingModel) {
|
||||
throw new Error(
|
||||
"No embedding configuration provided. Fallback to OpenAI embedding model. OPENAI_API_KEY and EMBEDDING_MODEL environment variables must be set.",
|
||||
);
|
||||
}
|
||||
embeddingConfig = {
|
||||
type: "OPENAI_EMBEDDING",
|
||||
component: {
|
||||
api_key: openAIApiKey,
|
||||
model_name: embeddingModel,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let transformConfig = config?.transform;
|
||||
if (!transformConfig) {
|
||||
transformConfig = {
|
||||
mode: "auto",
|
||||
chunk_size: 1024,
|
||||
chunk_overlap: 200,
|
||||
};
|
||||
}
|
||||
|
||||
const { data: pipeline } =
|
||||
await PipelinesService.upsertPipelineApiV1PipelinesPut({
|
||||
query: {
|
||||
project_id: projectId,
|
||||
},
|
||||
body: {
|
||||
name: this.params.name,
|
||||
embedding_config: embeddingConfig,
|
||||
transform_config: transformConfig,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (config?.verbose) {
|
||||
console.log(
|
||||
`Created pipeline ${pipeline.id} with name ${pipeline.name}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
PipelinesService,
|
||||
type RetrievalParams,
|
||||
@@ -11,7 +12,7 @@ import type { NodeWithScore } from "@llamaindex/core/schema";
|
||||
import { jsonToNode, ObjectType } from "@llamaindex/core/schema";
|
||||
import { extractText } from "@llamaindex/core/utils";
|
||||
import type { ClientParams, CloudConstructorParams } from "./type.js";
|
||||
import { getProjectId, initService } from "./utils.js";
|
||||
import { getPipelineId, initService } from "./utils.js";
|
||||
|
||||
export type CloudRetrieveParams = Omit<
|
||||
RetrievalParams,
|
||||
@@ -42,6 +43,24 @@ export class LlamaCloudRetriever extends BaseRetriever {
|
||||
});
|
||||
}
|
||||
|
||||
// LlamaCloud expects null values for filters, but LlamaIndexTS uses undefined for empty values
|
||||
// This function converts the undefined values to null
|
||||
private convertFilter(filters?: MetadataFilters): MetadataFilters | null {
|
||||
if (!filters) return null;
|
||||
|
||||
const processFilter = (
|
||||
filter: MetadataFilter | MetadataFilters,
|
||||
): MetadataFilter | MetadataFilters => {
|
||||
if ("filters" in filter) {
|
||||
// type MetadataFilters
|
||||
return { ...filter, filters: filter.filters.map(processFilter) };
|
||||
}
|
||||
return { ...filter, value: filter.value ?? null };
|
||||
};
|
||||
|
||||
return { ...filters, filters: filters.filters.map(processFilter) };
|
||||
}
|
||||
|
||||
constructor(params: CloudConstructorParams & CloudRetrieveParams) {
|
||||
super();
|
||||
this.clientParams = { apiKey: params.apiKey, baseUrl: params.baseUrl };
|
||||
@@ -57,45 +76,24 @@ export class LlamaCloudRetriever extends BaseRetriever {
|
||||
}
|
||||
|
||||
async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> {
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
query: {
|
||||
project_id: await getProjectId(this.projectName, this.organizationId),
|
||||
project_name: this.pipelineName,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
const pipelineId = await getPipelineId(
|
||||
this.pipelineName,
|
||||
this.projectName,
|
||||
this.organizationId,
|
||||
);
|
||||
|
||||
if (pipelines.length === 0 || !pipelines[0]!.id) {
|
||||
throw new Error(
|
||||
`No pipeline found with name ${this.pipelineName} in project ${this.projectName}`,
|
||||
);
|
||||
}
|
||||
|
||||
const { data: pipeline } =
|
||||
await PipelinesService.getPipelineApiV1PipelinesPipelineIdGet({
|
||||
path: {
|
||||
pipeline_id: pipelines[0]!.id,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (!pipeline) {
|
||||
throw new Error(
|
||||
`No pipeline found with name ${this.pipelineName} in project ${this.projectName}`,
|
||||
);
|
||||
}
|
||||
const filters = this.convertFilter(this.retrieveParams.filters);
|
||||
|
||||
const { data: results } =
|
||||
await PipelinesService.runSearchApiV1PipelinesPipelineIdRetrievePost({
|
||||
throwOnError: true,
|
||||
path: {
|
||||
pipeline_id: pipeline.id,
|
||||
pipeline_id: pipelineId,
|
||||
},
|
||||
body: {
|
||||
...this.retrieveParams,
|
||||
query: extractText(query),
|
||||
search_filters: this.retrieveParams.filters as MetadataFilters,
|
||||
search_filters: filters,
|
||||
dense_similarity_top_k: this.retrieveParams.similarityTopK!,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
import type {
|
||||
ConfiguredTransformationItem,
|
||||
PipelineCreate,
|
||||
PipelineType,
|
||||
} from "@llamaindex/cloud/api";
|
||||
import { SentenceSplitter } from "@llamaindex/core/node-parser";
|
||||
import { BaseNode, type TransformComponent } from "@llamaindex/core/schema";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
|
||||
export type GetPipelineCreateParams = {
|
||||
pipelineName: string;
|
||||
pipelineType: PipelineType;
|
||||
transformations?: TransformComponent[];
|
||||
inputNodes?: BaseNode[];
|
||||
};
|
||||
|
||||
function getTransformationConfig(
|
||||
transformation: TransformComponent,
|
||||
): ConfiguredTransformationItem {
|
||||
if (transformation instanceof SentenceSplitter) {
|
||||
return {
|
||||
configurable_transformation_type: "SENTENCE_AWARE_NODE_PARSER",
|
||||
component: {
|
||||
chunk_size: transformation.chunkSize, // TODO: set to public in SentenceSplitter
|
||||
chunk_overlap: transformation.chunkOverlap, // TODO: set to public in SentenceSplitter
|
||||
include_metadata: transformation.includeMetadata,
|
||||
include_prev_next_rel: transformation.includePrevNextRel,
|
||||
},
|
||||
};
|
||||
}
|
||||
if (transformation instanceof OpenAIEmbedding) {
|
||||
return {
|
||||
configurable_transformation_type: "OPENAI_EMBEDDING",
|
||||
component: {
|
||||
model: transformation.model,
|
||||
api_key: transformation.apiKey,
|
||||
embed_batch_size: transformation.embedBatchSize,
|
||||
dimensions: transformation.dimensions,
|
||||
},
|
||||
};
|
||||
}
|
||||
throw new Error(`Unsupported transformation: ${typeof transformation}`);
|
||||
}
|
||||
|
||||
export async function getPipelineCreate(
|
||||
params: GetPipelineCreateParams,
|
||||
): Promise<PipelineCreate> {
|
||||
const { pipelineName, pipelineType, transformations = [] } = params;
|
||||
|
||||
return {
|
||||
name: pipelineName,
|
||||
configured_transformations: transformations.map(getTransformationConfig),
|
||||
pipeline_type: pipelineType,
|
||||
};
|
||||
}
|
||||
@@ -1,4 +1,8 @@
|
||||
import { client, ProjectsService } from "@llamaindex/cloud/api";
|
||||
import {
|
||||
client,
|
||||
PipelinesService,
|
||||
ProjectsService,
|
||||
} from "@llamaindex/cloud/api";
|
||||
import { DEFAULT_BASE_URL } from "@llamaindex/core/global";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { ClientParams } from "./type.js";
|
||||
@@ -40,9 +44,9 @@ export async function getProjectId(
|
||||
): Promise<string> {
|
||||
const { data: projects } = await ProjectsService.listProjectsApiV1ProjectsGet(
|
||||
{
|
||||
path: {
|
||||
query: {
|
||||
project_name: projectName,
|
||||
organization_id: organizationId,
|
||||
organization_id: organizationId ?? null,
|
||||
},
|
||||
throwOnError: true,
|
||||
},
|
||||
@@ -66,3 +70,26 @@ export async function getProjectId(
|
||||
|
||||
return project.id;
|
||||
}
|
||||
|
||||
export async function getPipelineId(
|
||||
name: string,
|
||||
projectName: string,
|
||||
organizationId?: string,
|
||||
): Promise<string> {
|
||||
const { data: pipelines } =
|
||||
await PipelinesService.searchPipelinesApiV1PipelinesGet({
|
||||
query: {
|
||||
project_id: await getProjectId(projectName, organizationId),
|
||||
pipeline_name: name,
|
||||
},
|
||||
throwOnError: true,
|
||||
});
|
||||
|
||||
if (pipelines.length === 0 || !pipelines[0]!.id) {
|
||||
throw new Error(
|
||||
`No pipeline found with name ${name} in project ${projectName}`,
|
||||
);
|
||||
}
|
||||
|
||||
return pipelines[0]!.id;
|
||||
}
|
||||
|
||||
@@ -1,139 +1 @@
|
||||
import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
|
||||
import type { ImageType } from "@llamaindex/core/schema";
|
||||
import _ from "lodash";
|
||||
// only import type, to avoid bundling error
|
||||
import { loadTransformers } from "@llamaindex/env";
|
||||
import type {
|
||||
CLIPTextModelWithProjection,
|
||||
CLIPVisionModelWithProjection,
|
||||
PreTrainedTokenizer,
|
||||
Processor,
|
||||
} from "@xenova/transformers";
|
||||
import { Settings } from "../Settings.js";
|
||||
|
||||
async function readImage(input: ImageType) {
|
||||
const { RawImage } = await loadTransformers((transformer) => {
|
||||
Settings.callbackManager.dispatchEvent(
|
||||
"load-transformers",
|
||||
{
|
||||
transformer,
|
||||
},
|
||||
true,
|
||||
);
|
||||
});
|
||||
if (input instanceof Blob) {
|
||||
return await RawImage.fromBlob(input);
|
||||
} else if (_.isString(input) || input instanceof URL) {
|
||||
return await RawImage.fromURL(input);
|
||||
} else {
|
||||
throw new Error(`Unsupported input type: ${typeof input}`);
|
||||
}
|
||||
}
|
||||
|
||||
export enum ClipEmbeddingModelType {
|
||||
XENOVA_CLIP_VIT_BASE_PATCH32 = "Xenova/clip-vit-base-patch32",
|
||||
XENOVA_CLIP_VIT_BASE_PATCH16 = "Xenova/clip-vit-base-patch16",
|
||||
}
|
||||
|
||||
export class ClipEmbedding extends MultiModalEmbedding {
|
||||
modelType: ClipEmbeddingModelType =
|
||||
ClipEmbeddingModelType.XENOVA_CLIP_VIT_BASE_PATCH16;
|
||||
|
||||
private tokenizer: PreTrainedTokenizer | null = null;
|
||||
private processor: Processor | null = null;
|
||||
private visionModel: CLIPVisionModelWithProjection | null = null;
|
||||
private textModel: CLIPTextModelWithProjection | null = null;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
|
||||
async getTokenizer() {
|
||||
const { AutoTokenizer } = await loadTransformers((transformer) => {
|
||||
Settings.callbackManager.dispatchEvent(
|
||||
"load-transformers",
|
||||
{
|
||||
transformer,
|
||||
},
|
||||
true,
|
||||
);
|
||||
});
|
||||
if (!this.tokenizer) {
|
||||
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelType);
|
||||
}
|
||||
return this.tokenizer;
|
||||
}
|
||||
|
||||
async getProcessor() {
|
||||
const { AutoProcessor } = await loadTransformers((transformer) => {
|
||||
Settings.callbackManager.dispatchEvent(
|
||||
"load-transformers",
|
||||
{
|
||||
transformer,
|
||||
},
|
||||
true,
|
||||
);
|
||||
});
|
||||
if (!this.processor) {
|
||||
this.processor = await AutoProcessor.from_pretrained(this.modelType);
|
||||
}
|
||||
return this.processor;
|
||||
}
|
||||
|
||||
async getVisionModel() {
|
||||
const { CLIPVisionModelWithProjection } = await loadTransformers(
|
||||
(transformer) => {
|
||||
Settings.callbackManager.dispatchEvent(
|
||||
"load-transformers",
|
||||
{
|
||||
transformer,
|
||||
},
|
||||
true,
|
||||
);
|
||||
},
|
||||
);
|
||||
if (!this.visionModel) {
|
||||
this.visionModel = await CLIPVisionModelWithProjection.from_pretrained(
|
||||
this.modelType,
|
||||
);
|
||||
}
|
||||
|
||||
return this.visionModel;
|
||||
}
|
||||
|
||||
async getTextModel() {
|
||||
const { CLIPTextModelWithProjection } = await loadTransformers(
|
||||
(transformer) => {
|
||||
Settings.callbackManager.dispatchEvent(
|
||||
"load-transformers",
|
||||
{
|
||||
transformer,
|
||||
},
|
||||
true,
|
||||
);
|
||||
},
|
||||
);
|
||||
if (!this.textModel) {
|
||||
this.textModel = await CLIPTextModelWithProjection.from_pretrained(
|
||||
this.modelType,
|
||||
);
|
||||
}
|
||||
|
||||
return this.textModel;
|
||||
}
|
||||
|
||||
async getImageEmbedding(image: ImageType): Promise<number[]> {
|
||||
const loadedImage = await readImage(image);
|
||||
const imageInputs = await (await this.getProcessor())(loadedImage);
|
||||
const { image_embeds } = await (await this.getVisionModel())(imageInputs);
|
||||
return Array.from(image_embeds.data);
|
||||
}
|
||||
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
const textInputs = await (
|
||||
await this.getTokenizer()
|
||||
)([text], { padding: true, truncation: true });
|
||||
const { text_embeds } = await (await this.getTextModel())(textInputs);
|
||||
return text_embeds.data;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/clip";
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
|
||||
import type { ImageType } from "@llamaindex/core/schema";
|
||||
|
||||
/**
|
||||
* Cloudflare worker doesn't support image embeddings for now
|
||||
*/
|
||||
export class CloudflareWorkerMultiModalEmbedding extends MultiModalEmbedding {
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
getImageEmbedding(images: ImageType): Promise<number[]> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
getTextEmbedding(text: string): Promise<number[]> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
}
|
||||
@@ -1,152 +1 @@
|
||||
import { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import type { MessageContentDetail } from "@llamaindex/core/llms";
|
||||
import { extractSingleText } from "@llamaindex/core/utils";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
|
||||
const DEFAULT_MODEL = "sentence-transformers/clip-ViT-B-32";
|
||||
|
||||
const API_TOKEN_ENV_VARIABLE_NAME = "DEEPINFRA_API_TOKEN";
|
||||
|
||||
const API_ROOT = "https://api.deepinfra.com/v1/inference";
|
||||
|
||||
const DEFAULT_TIMEOUT = 60 * 1000;
|
||||
|
||||
const DEFAULT_MAX_RETRIES = 5;
|
||||
|
||||
export interface DeepInfraEmbeddingResponse {
|
||||
embeddings: number[][];
|
||||
request_id: string;
|
||||
inference_status: InferenceStatus;
|
||||
}
|
||||
|
||||
export interface InferenceStatus {
|
||||
status: string;
|
||||
runtime_ms: number;
|
||||
cost: number;
|
||||
tokens_input: number;
|
||||
}
|
||||
|
||||
const mapPrefixWithInputs = (prefix: string, inputs: string[]): string[] => {
|
||||
return inputs.map((input) => (prefix ? `${prefix} ${input}` : input));
|
||||
};
|
||||
|
||||
/**
|
||||
* DeepInfraEmbedding is an alias for DeepInfra that implements the BaseEmbedding interface.
|
||||
*/
|
||||
export class DeepInfraEmbedding extends BaseEmbedding {
|
||||
/**
|
||||
* DeepInfra model to use
|
||||
* @default "sentence-transformers/clip-ViT-B-32"
|
||||
* @see https://deepinfra.com/models/embeddings
|
||||
*/
|
||||
model: string;
|
||||
|
||||
/**
|
||||
* DeepInfra API token
|
||||
* @see https://deepinfra.com/dash/api_keys
|
||||
* If not provided, it will try to get the token from the environment variable `DEEPINFRA_API_TOKEN`
|
||||
*
|
||||
*/
|
||||
apiToken: string;
|
||||
|
||||
/**
|
||||
* Prefix to add to the query
|
||||
* @default ""
|
||||
*/
|
||||
queryPrefix: string;
|
||||
|
||||
/**
|
||||
* Prefix to add to the text
|
||||
* @default ""
|
||||
*/
|
||||
textPrefix: string;
|
||||
|
||||
/**
|
||||
*
|
||||
* @default 5
|
||||
*/
|
||||
maxRetries: number;
|
||||
|
||||
/**
|
||||
*
|
||||
* @default 60 * 1000
|
||||
*/
|
||||
timeout: number;
|
||||
|
||||
constructor(init?: Partial<DeepInfraEmbedding>) {
|
||||
super();
|
||||
|
||||
this.model = init?.model ?? DEFAULT_MODEL;
|
||||
this.apiToken = init?.apiToken ?? getEnv(API_TOKEN_ENV_VARIABLE_NAME) ?? "";
|
||||
this.queryPrefix = init?.queryPrefix ?? "";
|
||||
this.textPrefix = init?.textPrefix ?? "";
|
||||
this.maxRetries = init?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
||||
this.timeout = init?.timeout ?? DEFAULT_TIMEOUT;
|
||||
}
|
||||
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
const texts = mapPrefixWithInputs(this.textPrefix, [text]);
|
||||
const embeddings = await this.getDeepInfraEmbedding(texts);
|
||||
return embeddings[0]!;
|
||||
}
|
||||
|
||||
async getQueryEmbedding(
|
||||
query: MessageContentDetail,
|
||||
): Promise<number[] | null> {
|
||||
const text = extractSingleText(query);
|
||||
if (text) {
|
||||
const queries = mapPrefixWithInputs(this.queryPrefix, [text]);
|
||||
const embeddings = await this.getDeepInfraEmbedding(queries);
|
||||
return embeddings[0]!;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
getTextEmbeddings = async (texts: string[]): Promise<number[][]> => {
|
||||
const textsWithPrefix = mapPrefixWithInputs(this.textPrefix, texts);
|
||||
return this.getDeepInfraEmbedding(textsWithPrefix);
|
||||
};
|
||||
|
||||
async getQueryEmbeddings(queries: string[]): Promise<number[][]> {
|
||||
const queriesWithPrefix = mapPrefixWithInputs(this.queryPrefix, queries);
|
||||
return await this.getDeepInfraEmbedding(queriesWithPrefix);
|
||||
}
|
||||
|
||||
private async getDeepInfraEmbedding(inputs: string[]): Promise<number[][]> {
|
||||
const url = this.getUrl(this.model);
|
||||
|
||||
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
||||
const controller = new AbortController();
|
||||
const id = setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiToken}`,
|
||||
},
|
||||
body: JSON.stringify({ inputs }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`Request failed with status ${response.status}`);
|
||||
}
|
||||
|
||||
const responseJson: DeepInfraEmbeddingResponse = await response.json();
|
||||
return responseJson.embeddings;
|
||||
} catch (error) {
|
||||
console.error(`Attempt ${attempt + 1} failed: ${error}`);
|
||||
} finally {
|
||||
clearTimeout(id);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Exceeded maximum retries");
|
||||
}
|
||||
|
||||
private getUrl(model: string): string {
|
||||
return `${API_ROOT}/${model}`;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/deepinfra";
|
||||
|
||||
@@ -1,110 +1 @@
|
||||
import { HfInference } from "@huggingface/inference";
|
||||
import { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { loadTransformers } from "@llamaindex/env";
|
||||
import { Settings } from "../Settings.js";
|
||||
|
||||
export enum HuggingFaceEmbeddingModelType {
|
||||
XENOVA_ALL_MINILM_L6_V2 = "Xenova/all-MiniLM-L6-v2",
|
||||
XENOVA_ALL_MPNET_BASE_V2 = "Xenova/all-mpnet-base-v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses feature extraction from '@xenova/transformers' to generate embeddings.
|
||||
* Per default the model [XENOVA_ALL_MINILM_L6_V2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) is used.
|
||||
*
|
||||
* Can be changed by setting the `modelType` parameter in the constructor, e.g.:
|
||||
* ```
|
||||
* new HuggingFaceEmbedding({
|
||||
* modelType: HuggingFaceEmbeddingModelType.XENOVA_ALL_MPNET_BASE_V2,
|
||||
* });
|
||||
* ```
|
||||
*
|
||||
* @extends BaseEmbedding
|
||||
*/
|
||||
export class HuggingFaceEmbedding extends BaseEmbedding {
|
||||
modelType: string = HuggingFaceEmbeddingModelType.XENOVA_ALL_MINILM_L6_V2;
|
||||
quantized: boolean = true;
|
||||
|
||||
private extractor: any;
|
||||
|
||||
constructor(init?: Partial<HuggingFaceEmbedding>) {
|
||||
super();
|
||||
Object.assign(this, init);
|
||||
}
|
||||
|
||||
async getExtractor() {
|
||||
if (!this.extractor) {
|
||||
const { pipeline } = await loadTransformers((transformer) => {
|
||||
Settings.callbackManager.dispatchEvent(
|
||||
"load-transformers",
|
||||
{
|
||||
transformer,
|
||||
},
|
||||
true,
|
||||
);
|
||||
});
|
||||
this.extractor = await pipeline("feature-extraction", this.modelType, {
|
||||
quantized: this.quantized,
|
||||
});
|
||||
}
|
||||
return this.extractor;
|
||||
}
|
||||
|
||||
override async getTextEmbedding(text: string): Promise<number[]> {
|
||||
const extractor = await this.getExtractor();
|
||||
const output = await extractor(text, { pooling: "mean", normalize: true });
|
||||
return Array.from(output.data);
|
||||
}
|
||||
}
|
||||
|
||||
// Workaround to get the Options type from @huggingface/inference@2.7.0
|
||||
type HfInferenceOptions = ConstructorParameters<typeof HfInference>[1];
|
||||
|
||||
export type HFConfig = HfInferenceOptions & {
|
||||
model: string;
|
||||
accessToken: string;
|
||||
endpoint?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Uses feature extraction from Hugging Face's Inference API to generate embeddings.
|
||||
*
|
||||
* Set the `model` and `accessToken` parameter in the constructor, e.g.:
|
||||
* ```
|
||||
* new HuggingFaceInferenceAPIEmbedding({
|
||||
* model: HuggingFaceEmbeddingModelType.XENOVA_ALL_MPNET_BASE_V2,
|
||||
* accessToken: "<your-access-token>"
|
||||
* });
|
||||
* ```
|
||||
*
|
||||
* @extends BaseEmbedding
|
||||
*/
|
||||
export class HuggingFaceInferenceAPIEmbedding extends BaseEmbedding {
|
||||
model: string;
|
||||
hf: HfInference;
|
||||
|
||||
constructor(init: HFConfig) {
|
||||
super();
|
||||
const { model, accessToken, endpoint, ...hfInferenceOpts } = init;
|
||||
|
||||
this.hf = new HfInference(accessToken, hfInferenceOpts);
|
||||
this.model = model;
|
||||
if (endpoint) this.hf.endpoint(endpoint);
|
||||
}
|
||||
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
const res = await this.hf.featureExtraction({
|
||||
model: this.model,
|
||||
inputs: text,
|
||||
});
|
||||
return res as number[];
|
||||
}
|
||||
|
||||
getTextEmbeddings = async (texts: string[]): Promise<Array<number[]>> => {
|
||||
const res = await this.hf.featureExtraction({
|
||||
model: this.model,
|
||||
inputs: texts,
|
||||
});
|
||||
return res as number[][];
|
||||
};
|
||||
}
|
||||
export * from "@llamaindex/huggingface";
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
export * from "@llamaindex/core/embeddings";
|
||||
export { ClipEmbedding, ClipEmbeddingModelType } from "./ClipEmbedding.js";
|
||||
export { DeepInfraEmbedding } from "./DeepInfraEmbedding.js";
|
||||
export { FireworksEmbedding } from "./fireworks.js";
|
||||
export * from "./GeminiEmbedding.js";
|
||||
@@ -9,5 +10,3 @@ export * from "./MixedbreadAIEmbeddings.js";
|
||||
export { OllamaEmbedding } from "./OllamaEmbedding.js";
|
||||
export * from "./OpenAIEmbedding.js";
|
||||
export { TogetherEmbedding } from "./together.js";
|
||||
// ClipEmbedding might not work in non-node.js runtime, but it doesn't have side effects
|
||||
export { ClipEmbedding, ClipEmbeddingModelType } from "./ClipEmbedding.js";
|
||||
|
||||
@@ -14,12 +14,10 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
|
||||
retriever: BaseRetriever;
|
||||
responseSynthesizer: BaseSynthesizer;
|
||||
nodePostprocessors: BaseNodePostprocessor[];
|
||||
preFilters?: unknown;
|
||||
|
||||
constructor(
|
||||
retriever: BaseRetriever,
|
||||
responseSynthesizer?: BaseSynthesizer,
|
||||
preFilters?: unknown,
|
||||
nodePostprocessors?: BaseNodePostprocessor[],
|
||||
) {
|
||||
super(async (strOrQueryBundle, stream) => {
|
||||
@@ -52,7 +50,6 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
|
||||
this.retriever = retriever;
|
||||
this.responseSynthesizer =
|
||||
responseSynthesizer || getResponseSynthesizer("compact");
|
||||
this.preFilters = preFilters;
|
||||
this.nodePostprocessors = nodePostprocessors || [];
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
import type { LLM } from "@llamaindex/core/llms";
|
||||
import {
|
||||
PromptTemplate,
|
||||
defaultKeywordExtractPrompt,
|
||||
defaultQuestionExtractPrompt,
|
||||
defaultSummaryPrompt,
|
||||
defaultTitleCombinePromptTemplate,
|
||||
defaultTitleExtractorPromptTemplate,
|
||||
type KeywordExtractPrompt,
|
||||
type QuestionExtractPrompt,
|
||||
type SummaryPrompt,
|
||||
type TitleCombinePrompt,
|
||||
type TitleExtractorPrompt,
|
||||
} from "@llamaindex/core/prompts";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode, TextNode } from "@llamaindex/core/schema";
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
import {
|
||||
defaultKeywordExtractorPromptTemplate,
|
||||
defaultQuestionAnswerPromptTemplate,
|
||||
defaultSummaryExtractorPromptTemplate,
|
||||
defaultTitleCombinePromptTemplate,
|
||||
defaultTitleExtractorPromptTemplate,
|
||||
} from "./prompts.js";
|
||||
import { BaseExtractor } from "./types.js";
|
||||
|
||||
const STRIP_REGEX = /(\r\n|\n|\r)/gm;
|
||||
@@ -16,6 +22,7 @@ const STRIP_REGEX = /(\r\n|\n|\r)/gm;
|
||||
type KeywordExtractArgs = {
|
||||
llm?: LLM;
|
||||
keywords?: number;
|
||||
promptTemplate?: KeywordExtractPrompt["template"];
|
||||
};
|
||||
|
||||
type ExtractKeyword = {
|
||||
@@ -39,6 +46,12 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
*/
|
||||
keywords: number = 5;
|
||||
|
||||
/**
|
||||
* The prompt template to use for the question extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
promptTemplate: KeywordExtractPrompt;
|
||||
|
||||
/**
|
||||
* Constructor for the KeywordExtractor class.
|
||||
* @param {LLM} llm LLM instance.
|
||||
@@ -53,6 +66,12 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.keywords = options?.keywords ?? 5;
|
||||
this.promptTemplate = options?.promptTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context", "maxKeywords"],
|
||||
template: options.promptTemplate,
|
||||
})
|
||||
: defaultKeywordExtractPrompt;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -66,9 +85,9 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
}
|
||||
|
||||
const completion = await this.llm.complete({
|
||||
prompt: defaultKeywordExtractorPromptTemplate({
|
||||
contextStr: node.getContent(MetadataMode.ALL),
|
||||
keywords: this.keywords,
|
||||
prompt: this.promptTemplate.format({
|
||||
context: node.getContent(MetadataMode.ALL),
|
||||
maxKeywords: this.keywords.toString(),
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -93,8 +112,8 @@ export class KeywordExtractor extends BaseExtractor {
|
||||
type TitleExtractorsArgs = {
|
||||
llm?: LLM;
|
||||
nodes?: number;
|
||||
nodeTemplate?: string;
|
||||
combineTemplate?: string;
|
||||
nodeTemplate?: TitleExtractorPrompt["template"];
|
||||
combineTemplate?: TitleCombinePrompt["template"];
|
||||
};
|
||||
|
||||
type ExtractTitle = {
|
||||
@@ -129,19 +148,19 @@ export class TitleExtractor extends BaseExtractor {
|
||||
* The prompt template to use for the title extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
nodeTemplate: string;
|
||||
nodeTemplate: TitleExtractorPrompt;
|
||||
|
||||
/**
|
||||
* The prompt template to merge title with..
|
||||
* @type {string}
|
||||
*/
|
||||
combineTemplate: string;
|
||||
combineTemplate: TitleCombinePrompt;
|
||||
|
||||
/**
|
||||
* Constructor for the TitleExtractor class.
|
||||
* @param {LLM} llm LLM instance.
|
||||
* @param {number} nodes Number of nodes to extract titles from.
|
||||
* @param {string} nodeTemplate The prompt template to use for the title extractor.
|
||||
* @param {TitleExtractorPrompt} nodeTemplate The prompt template to use for the title extractor.
|
||||
* @param {string} combineTemplate The prompt template to merge title with..
|
||||
*/
|
||||
constructor(options?: TitleExtractorsArgs) {
|
||||
@@ -150,10 +169,19 @@ export class TitleExtractor extends BaseExtractor {
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.nodes = options?.nodes ?? 5;
|
||||
|
||||
this.nodeTemplate =
|
||||
options?.nodeTemplate ?? defaultTitleExtractorPromptTemplate();
|
||||
this.combineTemplate =
|
||||
options?.combineTemplate ?? defaultTitleCombinePromptTemplate();
|
||||
this.nodeTemplate = options?.nodeTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: options.nodeTemplate,
|
||||
})
|
||||
: defaultTitleExtractorPromptTemplate;
|
||||
|
||||
this.combineTemplate = options?.combineTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: options.combineTemplate,
|
||||
})
|
||||
: defaultTitleCombinePromptTemplate;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -218,8 +246,8 @@ export class TitleExtractor extends BaseExtractor {
|
||||
const titleCandidates = await this.getTitlesCandidates(nodes);
|
||||
const combinedTitles = titleCandidates.join(", ");
|
||||
const completion = await this.llm.complete({
|
||||
prompt: defaultTitleCombinePromptTemplate({
|
||||
contextStr: combinedTitles,
|
||||
prompt: this.combineTemplate.format({
|
||||
context: combinedTitles,
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -232,8 +260,8 @@ export class TitleExtractor extends BaseExtractor {
|
||||
private async getTitlesCandidates(nodes: BaseNode[]): Promise<string[]> {
|
||||
const titleJobs = nodes.map(async (node) => {
|
||||
const completion = await this.llm.complete({
|
||||
prompt: defaultTitleExtractorPromptTemplate({
|
||||
contextStr: node.getContent(MetadataMode.ALL),
|
||||
prompt: this.nodeTemplate.format({
|
||||
context: node.getContent(MetadataMode.ALL),
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -247,7 +275,7 @@ export class TitleExtractor extends BaseExtractor {
|
||||
type QuestionAnswerExtractArgs = {
|
||||
llm?: LLM;
|
||||
questions?: number;
|
||||
promptTemplate?: string;
|
||||
promptTemplate?: QuestionExtractPrompt["template"];
|
||||
embeddingOnly?: boolean;
|
||||
};
|
||||
|
||||
@@ -276,7 +304,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
* The prompt template to use for the question extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
promptTemplate: string;
|
||||
promptTemplate: QuestionExtractPrompt;
|
||||
|
||||
/**
|
||||
* Wheter to use metadata for embeddings only
|
||||
@@ -289,7 +317,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
* Constructor for the QuestionsAnsweredExtractor class.
|
||||
* @param {LLM} llm LLM instance.
|
||||
* @param {number} questions Number of questions to generate.
|
||||
* @param {string} promptTemplate The prompt template to use for the question extractor.
|
||||
* @param {TextQAPrompt} promptTemplate The prompt template to use for the question extractor.
|
||||
* @param {boolean} embeddingOnly Wheter to use metadata for embeddings only.
|
||||
*/
|
||||
constructor(options?: QuestionAnswerExtractArgs) {
|
||||
@@ -300,12 +328,14 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.questions = options?.questions ?? 5;
|
||||
this.promptTemplate =
|
||||
options?.promptTemplate ??
|
||||
defaultQuestionAnswerPromptTemplate({
|
||||
numQuestions: this.questions,
|
||||
contextStr: "",
|
||||
});
|
||||
this.promptTemplate = options?.promptTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["numQuestions", "context"],
|
||||
template: options.promptTemplate,
|
||||
}).partialFormat({
|
||||
numQuestions: "5",
|
||||
})
|
||||
: defaultQuestionExtractPrompt;
|
||||
this.embeddingOnly = options?.embeddingOnly ?? false;
|
||||
}
|
||||
|
||||
@@ -323,9 +353,9 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
|
||||
const contextStr = node.getContent(this.metadataMode);
|
||||
|
||||
const prompt = defaultQuestionAnswerPromptTemplate({
|
||||
contextStr,
|
||||
numQuestions: this.questions,
|
||||
const prompt = this.promptTemplate.format({
|
||||
context: contextStr,
|
||||
numQuestions: this.questions.toString(),
|
||||
});
|
||||
|
||||
const questions = await this.llm.complete({
|
||||
@@ -356,7 +386,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
||||
type SummaryExtractArgs = {
|
||||
llm?: LLM;
|
||||
summaries?: string[];
|
||||
promptTemplate?: string;
|
||||
promptTemplate?: SummaryPrompt["template"];
|
||||
};
|
||||
|
||||
type ExtractSummary = {
|
||||
@@ -385,7 +415,7 @@ export class SummaryExtractor extends BaseExtractor {
|
||||
* The prompt template to use for the summary extractor.
|
||||
* @type {string}
|
||||
*/
|
||||
promptTemplate: string;
|
||||
promptTemplate: SummaryPrompt;
|
||||
|
||||
private selfSummary: boolean;
|
||||
private prevSummary: boolean;
|
||||
@@ -404,8 +434,12 @@ export class SummaryExtractor extends BaseExtractor {
|
||||
|
||||
this.llm = options?.llm ?? new OpenAI();
|
||||
this.summaries = summaries;
|
||||
this.promptTemplate =
|
||||
options?.promptTemplate ?? defaultSummaryExtractorPromptTemplate();
|
||||
this.promptTemplate = options?.promptTemplate
|
||||
? new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: options.promptTemplate,
|
||||
})
|
||||
: defaultSummaryPrompt;
|
||||
|
||||
this.selfSummary = summaries?.includes("self") ?? false;
|
||||
this.prevSummary = summaries?.includes("prev") ?? false;
|
||||
@@ -422,10 +456,10 @@ export class SummaryExtractor extends BaseExtractor {
|
||||
return "";
|
||||
}
|
||||
|
||||
const contextStr = node.getContent(this.metadataMode);
|
||||
const context = node.getContent(this.metadataMode);
|
||||
|
||||
const prompt = defaultSummaryExtractorPromptTemplate({
|
||||
contextStr,
|
||||
const prompt = this.promptTemplate.format({
|
||||
context,
|
||||
});
|
||||
|
||||
const summary = await this.llm.complete({
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
export interface DefaultPromptTemplate {
|
||||
contextStr: string;
|
||||
}
|
||||
|
||||
export interface DefaultKeywordExtractorPromptTemplate
|
||||
extends DefaultPromptTemplate {
|
||||
keywords: number;
|
||||
}
|
||||
|
||||
export interface DefaultQuestionAnswerPromptTemplate
|
||||
extends DefaultPromptTemplate {
|
||||
numQuestions: number;
|
||||
}
|
||||
|
||||
export interface DefaultNodeTextTemplate {
|
||||
metadataStr: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
export const defaultKeywordExtractorPromptTemplate = ({
|
||||
contextStr = "",
|
||||
keywords = 5,
|
||||
}: DefaultKeywordExtractorPromptTemplate) => `${contextStr}
|
||||
Give ${keywords} unique keywords for this document.
|
||||
Format as comma separated.
|
||||
Keywords: `;
|
||||
|
||||
export const defaultTitleExtractorPromptTemplate = (
|
||||
{ contextStr = "" }: DefaultPromptTemplate = {
|
||||
contextStr: "",
|
||||
},
|
||||
) => `${contextStr}
|
||||
Give a title that summarizes all of the unique entities, titles or themes found in the context.
|
||||
Title: `;
|
||||
|
||||
export const defaultTitleCombinePromptTemplate = (
|
||||
{ contextStr = "" }: DefaultPromptTemplate = {
|
||||
contextStr: "",
|
||||
},
|
||||
) => `${contextStr}
|
||||
Based on the above candidate titles and contents, what is the comprehensive title for this document?
|
||||
Title: `;
|
||||
|
||||
export const defaultQuestionAnswerPromptTemplate = (
|
||||
{ contextStr = "", numQuestions = 5 }: DefaultQuestionAnswerPromptTemplate = {
|
||||
contextStr: "",
|
||||
numQuestions: 5,
|
||||
},
|
||||
) => `${contextStr}
|
||||
Given the contextual informations, generate ${numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provideds as well.
|
||||
Try using these summaries to generate better questions that this context can answer.
|
||||
`;
|
||||
|
||||
export const defaultSummaryExtractorPromptTemplate = (
|
||||
{ contextStr = "" }: DefaultPromptTemplate = {
|
||||
contextStr: "",
|
||||
},
|
||||
) => `${contextStr}
|
||||
Summarize the key topics and entities of the sections.
|
||||
Summary: `;
|
||||
|
||||
export const defaultNodeTextTemplate = ({
|
||||
metadataStr = "",
|
||||
content = "",
|
||||
}: {
|
||||
metadataStr?: string;
|
||||
content?: string;
|
||||
} = {}) => `[Excerpt from document]
|
||||
${metadataStr}
|
||||
Excerpt:
|
||||
-----
|
||||
${content}
|
||||
-----
|
||||
`;
|
||||
@@ -1,10 +1,10 @@
|
||||
import { defaultNodeTextTemplate } from "@llamaindex/core/prompts";
|
||||
import {
|
||||
BaseNode,
|
||||
MetadataMode,
|
||||
TextNode,
|
||||
TransformComponent,
|
||||
} from "@llamaindex/core/schema";
|
||||
import { defaultNodeTextTemplate } from "./prompts.js";
|
||||
|
||||
/*
|
||||
* Abstract class for all extractors.
|
||||
@@ -71,7 +71,7 @@ export abstract class BaseExtractor extends TransformComponent {
|
||||
if (newNodes[idx] instanceof TextNode) {
|
||||
newNodes[idx] = new TextNode({
|
||||
...newNodes[idx],
|
||||
textTemplate: defaultNodeTextTemplate(),
|
||||
textTemplate: defaultNodeTextTemplate.format(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,39 +8,6 @@ import { runTransformations } from "../ingestion/IngestionPipeline.js";
|
||||
import type { StorageContext } from "../storage/StorageContext.js";
|
||||
import type { BaseDocumentStore } from "../storage/docStore/types.js";
|
||||
import type { BaseIndexStore } from "../storage/indexStore/types.js";
|
||||
import { IndexStruct } from "./IndexStruct.js";
|
||||
import { IndexStructType } from "./json-to-index-struct.js";
|
||||
|
||||
// A table of keywords mapping keywords to text chunks.
|
||||
export class KeywordTable extends IndexStruct {
|
||||
table: Map<string, Set<string>> = new Map();
|
||||
type: IndexStructType = IndexStructType.KEYWORD_TABLE;
|
||||
|
||||
addNode(keywords: string[], nodeId: string): void {
|
||||
keywords.forEach((keyword) => {
|
||||
if (!this.table.has(keyword)) {
|
||||
this.table.set(keyword, new Set());
|
||||
}
|
||||
this.table.get(keyword)!.add(nodeId);
|
||||
});
|
||||
}
|
||||
|
||||
deleteNode(keywords: string[], nodeId: string) {
|
||||
keywords.forEach((keyword) => {
|
||||
if (this.table.has(keyword)) {
|
||||
this.table.get(keyword)!.delete(nodeId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
toJson(): Record<string, unknown> {
|
||||
return {
|
||||
...super.toJson(),
|
||||
table: this.table,
|
||||
type: this.type,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export interface BaseIndexInit<T> {
|
||||
serviceContext?: ServiceContext | undefined;
|
||||
|
||||
@@ -13,7 +13,7 @@ import type { StorageContext } from "../../storage/StorageContext.js";
|
||||
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { BaseIndexInit } from "../BaseIndex.js";
|
||||
import { BaseIndex, KeywordTable } from "../BaseIndex.js";
|
||||
import { BaseIndex } from "../BaseIndex.js";
|
||||
import { IndexStructType } from "../json-to-index-struct.js";
|
||||
import {
|
||||
extractKeywordsGivenResponse,
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
simpleExtractKeywords,
|
||||
} from "./utils.js";
|
||||
|
||||
import { KeywordTable } from "@llamaindex/core/data-structs";
|
||||
import type { LLM } from "@llamaindex/core/llms";
|
||||
import {
|
||||
defaultKeywordExtractPrompt,
|
||||
@@ -246,7 +247,6 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
|
||||
return new RetrieverQueryEngine(
|
||||
retriever ?? this.asRetriever(),
|
||||
responseSynthesizer,
|
||||
options?.preFilters,
|
||||
options?.nodePostprocessors,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -189,7 +189,6 @@ export class SummaryIndex extends BaseIndex<IndexList> {
|
||||
return new RetrieverQueryEngine(
|
||||
retriever,
|
||||
responseSynthesizer,
|
||||
options?.preFilters,
|
||||
options?.nodePostprocessors,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -31,8 +31,8 @@ import type { StorageContext } from "../../storage/StorageContext.js";
|
||||
import { storageContextFromDefaults } from "../../storage/StorageContext.js";
|
||||
import type { BaseIndexStore } from "../../storage/indexStore/types.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
MetadataFilters,
|
||||
VectorStore,
|
||||
VectorStoreByType,
|
||||
VectorStoreQueryResult,
|
||||
} from "../../vector-store/index.js";
|
||||
@@ -264,7 +264,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
}
|
||||
|
||||
static async fromVectorStore(
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
serviceContext?: ServiceContext,
|
||||
) {
|
||||
return this.fromVectorStores(
|
||||
@@ -298,9 +298,8 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
similarityTopK,
|
||||
} = options ?? {};
|
||||
return new RetrieverQueryEngine(
|
||||
retriever ?? this.asRetriever({ similarityTopK }),
|
||||
retriever ?? this.asRetriever({ similarityTopK, filters: preFilters }),
|
||||
responseSynthesizer,
|
||||
preFilters,
|
||||
nodePostprocessors,
|
||||
);
|
||||
}
|
||||
@@ -308,7 +307,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
protected async insertNodesToStore(
|
||||
newIds: string[],
|
||||
nodes: BaseNode[],
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
): Promise<void> {
|
||||
// NOTE: if the vector store doesn't store text,
|
||||
// we need to add the nodes to the index struct and document store
|
||||
@@ -358,7 +357,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
}
|
||||
|
||||
protected async deleteRefDocFromStore(
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
refDocId: string,
|
||||
): Promise<void> {
|
||||
await vectorStore.delete(refDocId);
|
||||
@@ -387,7 +386,7 @@ export type VectorIndexRetrieverOptions = {
|
||||
index: VectorStoreIndex;
|
||||
similarityTopK?: number | undefined;
|
||||
topK?: TopKMap | undefined;
|
||||
filters?: MetadataFilters;
|
||||
filters?: MetadataFilters | undefined;
|
||||
};
|
||||
|
||||
export class VectorIndexRetriever extends BaseRetriever {
|
||||
@@ -426,7 +425,7 @@ export class VectorIndexRetriever extends BaseRetriever {
|
||||
let nodesWithScores: NodeWithScore[] = [];
|
||||
|
||||
for (const type in vectorStores) {
|
||||
const vectorStore: VectorStore = vectorStores[type as ModalityType]!;
|
||||
const vectorStore: BaseVectorStore = vectorStores[type as ModalityType]!;
|
||||
nodesWithScores = nodesWithScores.concat(
|
||||
await this.retrieveQuery(query, type as ModalityType, vectorStore),
|
||||
);
|
||||
@@ -437,7 +436,7 @@ export class VectorIndexRetriever extends BaseRetriever {
|
||||
protected async retrieveQuery(
|
||||
query: MessageContent,
|
||||
type: ModalityType,
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
filters?: MetadataFilters,
|
||||
): Promise<NodeWithScore[]> {
|
||||
// convert string message to multi-modal format
|
||||
|
||||
@@ -7,7 +7,10 @@ import {
|
||||
type Metadata,
|
||||
} from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../storage/docStore/types.js";
|
||||
import type { VectorStore, VectorStoreByType } from "../vector-store/types.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
|
||||
import {
|
||||
DocStoreStrategy,
|
||||
@@ -59,7 +62,7 @@ export class IngestionPipeline {
|
||||
transformations: TransformComponent[] = [];
|
||||
documents?: Document[] | undefined;
|
||||
reader?: BaseReader | undefined;
|
||||
vectorStore?: VectorStore | undefined;
|
||||
vectorStore?: BaseVectorStore | undefined;
|
||||
vectorStores?: VectorStoreByType | undefined;
|
||||
docStore?: BaseDocumentStore;
|
||||
docStoreStrategy: DocStoreStrategy = DocStoreStrategy.UPSERTS;
|
||||
@@ -133,7 +136,7 @@ export async function addNodesToVectorStores(
|
||||
nodesAdded?: (
|
||||
newIds: string[],
|
||||
nodes: BaseNode<Metadata>[],
|
||||
vectorStore: VectorStore,
|
||||
vectorStore: BaseVectorStore,
|
||||
) => Promise<void>,
|
||||
) {
|
||||
const nodeMap = splitNodesByType(nodes);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { VectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import { classify } from "./classify.js";
|
||||
|
||||
/**
|
||||
@@ -9,9 +9,9 @@ import { classify } from "./classify.js";
|
||||
*/
|
||||
export class UpsertsAndDeleteStrategy extends TransformComponent {
|
||||
protected docStore: BaseDocumentStore;
|
||||
protected vectorStores: VectorStore[] | undefined;
|
||||
protected vectorStores: BaseVectorStore[] | undefined;
|
||||
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: VectorStore[]) {
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: BaseVectorStore[]) {
|
||||
super(async (nodes: BaseNode[]): Promise<BaseNode[]> => {
|
||||
const { dedupedNodes, missingDocs, unusedDocs } = await classify(
|
||||
this.docStore,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { VectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import { classify } from "./classify.js";
|
||||
|
||||
/**
|
||||
@@ -8,9 +8,9 @@ import { classify } from "./classify.js";
|
||||
*/
|
||||
export class UpsertsStrategy extends TransformComponent {
|
||||
protected docStore: BaseDocumentStore;
|
||||
protected vectorStores: VectorStore[] | undefined;
|
||||
protected vectorStores: BaseVectorStore[] | undefined;
|
||||
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: VectorStore[]) {
|
||||
constructor(docStore: BaseDocumentStore, vectorStores?: BaseVectorStore[]) {
|
||||
super(async (nodes: BaseNode[]): Promise<BaseNode[]> => {
|
||||
const { dedupedNodes, unusedDocs } = await classify(this.docStore, nodes);
|
||||
// remove unused docs
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../storage/docStore/types.js";
|
||||
import type { VectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
|
||||
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
|
||||
import { UpsertsStrategy } from "./UpsertsStrategy.js";
|
||||
@@ -28,7 +28,7 @@ class NoOpStrategy extends TransformComponent {
|
||||
export function createDocStoreStrategy(
|
||||
docStoreStrategy: DocStoreStrategy,
|
||||
docStore?: BaseDocumentStore,
|
||||
vectorStores: VectorStore[] = [],
|
||||
vectorStores: BaseVectorStore[] = [],
|
||||
): TransformComponent {
|
||||
if (docStoreStrategy === DocStoreStrategy.NONE) {
|
||||
return new NoOpStrategy();
|
||||
|
||||
@@ -1,305 +0,0 @@
|
||||
type Status = "starting" | "processing" | "succeeded" | "failed" | "canceled";
|
||||
type Visibility = "public" | "private";
|
||||
type WebhookEventType = "start" | "output" | "logs" | "completed";
|
||||
|
||||
export interface ApiError extends Error {
|
||||
request: Request;
|
||||
response: Response;
|
||||
}
|
||||
|
||||
export interface Account {
|
||||
type: "user" | "organization";
|
||||
username: string;
|
||||
name: string;
|
||||
github_url?: string;
|
||||
}
|
||||
|
||||
export interface Collection {
|
||||
name: string;
|
||||
slug: string;
|
||||
description: string;
|
||||
models?: Model[];
|
||||
}
|
||||
|
||||
export interface Deployment {
|
||||
owner: string;
|
||||
name: string;
|
||||
current_release: {
|
||||
number: number;
|
||||
model: string;
|
||||
version: string;
|
||||
created_at: string;
|
||||
created_by: Account;
|
||||
configuration: {
|
||||
hardware: string;
|
||||
min_instances: number;
|
||||
max_instances: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
export interface Hardware {
|
||||
sku: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export interface Model {
|
||||
url: string;
|
||||
owner: string;
|
||||
name: string;
|
||||
description?: string;
|
||||
visibility: "public" | "private";
|
||||
github_url?: string;
|
||||
paper_url?: string;
|
||||
license_url?: string;
|
||||
run_count: number;
|
||||
cover_image_url?: string;
|
||||
default_example?: Prediction;
|
||||
latest_version?: ModelVersion;
|
||||
}
|
||||
|
||||
export interface ModelVersion {
|
||||
id: string;
|
||||
created_at: string;
|
||||
cog_version: string;
|
||||
openapi_schema: object;
|
||||
}
|
||||
|
||||
export interface Prediction {
|
||||
id: string;
|
||||
status: Status;
|
||||
model: string;
|
||||
version: string;
|
||||
input: object;
|
||||
output?: any;
|
||||
source: "api" | "web";
|
||||
error?: any;
|
||||
logs?: string;
|
||||
metrics?: {
|
||||
predict_time?: number;
|
||||
};
|
||||
webhook?: string;
|
||||
webhook_events_filter?: WebhookEventType[];
|
||||
created_at: string;
|
||||
started_at?: string;
|
||||
completed_at?: string;
|
||||
urls: {
|
||||
get: string;
|
||||
cancel: string;
|
||||
stream?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type Training = Prediction;
|
||||
|
||||
export interface Page<T> {
|
||||
previous?: string;
|
||||
next?: string;
|
||||
results: T[];
|
||||
}
|
||||
|
||||
export interface ServerSentEvent {
|
||||
event: string;
|
||||
data: string;
|
||||
id?: string;
|
||||
retry?: number;
|
||||
}
|
||||
|
||||
export interface WebhookSecret {
|
||||
key: string;
|
||||
}
|
||||
|
||||
export default class Replicate {
|
||||
constructor(options?: {
|
||||
auth?: string;
|
||||
userAgent?: string;
|
||||
baseUrl?: string;
|
||||
fetch?: (input: Request | string, init?: RequestInit) => Promise<Response>;
|
||||
});
|
||||
|
||||
auth: string;
|
||||
userAgent?: string;
|
||||
baseUrl?: string;
|
||||
fetch: (input: Request | string, init?: RequestInit) => Promise<Response>;
|
||||
|
||||
run(
|
||||
identifier: `${string}/${string}` | `${string}/${string}:${string}`,
|
||||
options: {
|
||||
input: object;
|
||||
wait?: { interval?: number };
|
||||
webhook?: string;
|
||||
webhook_events_filter?: WebhookEventType[];
|
||||
signal?: AbortSignal;
|
||||
},
|
||||
progress?: (prediction: Prediction) => void,
|
||||
): Promise<object>;
|
||||
|
||||
stream(
|
||||
identifier: `${string}/${string}` | `${string}/${string}:${string}`,
|
||||
options: {
|
||||
input: object;
|
||||
webhook?: string;
|
||||
webhook_events_filter?: WebhookEventType[];
|
||||
signal?: AbortSignal;
|
||||
},
|
||||
): AsyncGenerator<ServerSentEvent>;
|
||||
|
||||
request(
|
||||
route: string | URL,
|
||||
options: {
|
||||
method?: string;
|
||||
headers?: object | Headers;
|
||||
params?: object;
|
||||
data?: object;
|
||||
},
|
||||
): Promise<Response>;
|
||||
|
||||
paginate<T>(endpoint: () => Promise<Page<T>>): AsyncGenerator<[T]>;
|
||||
|
||||
wait(
|
||||
prediction: Prediction,
|
||||
options?: {
|
||||
interval?: number;
|
||||
},
|
||||
stop?: (prediction: Prediction) => Promise<boolean>,
|
||||
): Promise<Prediction>;
|
||||
|
||||
accounts: {
|
||||
current(): Promise<Account>;
|
||||
};
|
||||
|
||||
collections: {
|
||||
list(): Promise<Page<Collection>>;
|
||||
get(collection_slug: string): Promise<Collection>;
|
||||
};
|
||||
|
||||
deployments: {
|
||||
predictions: {
|
||||
create(
|
||||
deployment_owner: string,
|
||||
deployment_name: string,
|
||||
options: {
|
||||
input: object;
|
||||
stream?: boolean;
|
||||
webhook?: string;
|
||||
webhook_events_filter?: WebhookEventType[];
|
||||
},
|
||||
): Promise<Prediction>;
|
||||
};
|
||||
get(deployment_owner: string, deployment_name: string): Promise<Deployment>;
|
||||
create(deployment_config: {
|
||||
name: string;
|
||||
model: string;
|
||||
version: string;
|
||||
hardware: string;
|
||||
min_instances: number;
|
||||
max_instances: number;
|
||||
}): Promise<Deployment>;
|
||||
update(
|
||||
deployment_owner: string,
|
||||
deployment_name: string,
|
||||
deployment_config: {
|
||||
version?: string;
|
||||
hardware?: string;
|
||||
min_instances?: number;
|
||||
max_instances?: number;
|
||||
} & (
|
||||
| { version: string }
|
||||
| { hardware: string }
|
||||
| { min_instances: number }
|
||||
| { max_instances: number }
|
||||
),
|
||||
): Promise<Deployment>;
|
||||
list(): Promise<Page<Deployment>>;
|
||||
};
|
||||
|
||||
hardware: {
|
||||
list(): Promise<Hardware[]>;
|
||||
};
|
||||
|
||||
models: {
|
||||
get(model_owner: string, model_name: string): Promise<Model>;
|
||||
list(): Promise<Page<Model>>;
|
||||
create(
|
||||
model_owner: string,
|
||||
model_name: string,
|
||||
options: {
|
||||
visibility: Visibility;
|
||||
hardware: string;
|
||||
description?: string;
|
||||
github_url?: string;
|
||||
paper_url?: string;
|
||||
license_url?: string;
|
||||
cover_image_url?: string;
|
||||
},
|
||||
): Promise<Model>;
|
||||
versions: {
|
||||
list(model_owner: string, model_name: string): Promise<ModelVersion[]>;
|
||||
get(
|
||||
model_owner: string,
|
||||
model_name: string,
|
||||
version_id: string,
|
||||
): Promise<ModelVersion>;
|
||||
};
|
||||
};
|
||||
|
||||
predictions: {
|
||||
create(
|
||||
options: {
|
||||
model?: string;
|
||||
version?: string;
|
||||
input: object;
|
||||
stream?: boolean;
|
||||
webhook?: string;
|
||||
webhook_events_filter?: WebhookEventType[];
|
||||
} & ({ version: string } | { model: string }),
|
||||
): Promise<Prediction>;
|
||||
get(prediction_id: string): Promise<Prediction>;
|
||||
cancel(prediction_id: string): Promise<Prediction>;
|
||||
list(): Promise<Page<Prediction>>;
|
||||
};
|
||||
|
||||
trainings: {
|
||||
create(
|
||||
model_owner: string,
|
||||
model_name: string,
|
||||
version_id: string,
|
||||
options: {
|
||||
destination: `${string}/${string}`;
|
||||
input: object;
|
||||
webhook?: string;
|
||||
webhook_events_filter?: WebhookEventType[];
|
||||
},
|
||||
): Promise<Training>;
|
||||
get(training_id: string): Promise<Training>;
|
||||
cancel(training_id: string): Promise<Training>;
|
||||
list(): Promise<Page<Training>>;
|
||||
};
|
||||
|
||||
webhooks: {
|
||||
default: {
|
||||
secret: {
|
||||
get(): Promise<WebhookSecret>;
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
export function validateWebhook(
|
||||
requestData:
|
||||
| Request
|
||||
| {
|
||||
id?: string;
|
||||
timestamp?: string;
|
||||
body: string;
|
||||
secret?: string;
|
||||
signature?: string;
|
||||
},
|
||||
secret: string,
|
||||
): Promise<boolean>;
|
||||
|
||||
export function parseProgressFromLogs(logs: Prediction | string): {
|
||||
percentage: number;
|
||||
current: number;
|
||||
total: number;
|
||||
} | null;
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,201 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2023 Replicate, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
@@ -1,8 +0,0 @@
|
||||
export async function getImageEmbedModel() {
|
||||
if (globalThis.navigator?.userAgent === "Cloudflare-Workers") {
|
||||
return (await import("../../embeddings/CloudflareWorkerEmbedding.js"))
|
||||
.CloudflareWorkerMultiModalEmbedding;
|
||||
} else {
|
||||
return (await import("../../embeddings/ClipEmbedding.js")).ClipEmbedding;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user