mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-04 03:40:26 -04:00
Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bd940d1d43 | |||
| 9f8ad37b79 | |||
| 7265f74c24 | |||
| e3f1b85846 | |||
| e38e474f86 | |||
| 2019a041f7 | |||
| 067a4894fe | |||
| 21769c8ad9 | |||
| 89ea1e1d31 | |||
| d9bbaf95f3 | |||
| 8744796c06 | |||
| f02621e379 | |||
| 1892e1ce1d | |||
| d90d8959a5 | |||
| 4df1fe6cca | |||
| 34faf4821a | |||
| b24ffc6174 | |||
| 82e25c924c | |||
| 1931bbca74 | |||
| 94566169fb | |||
| d6c270ec7a | |||
| e3a77044d5 | |||
| fd9c8294e1 | |||
| 0ebbfc1031 | |||
| 5dec9f912a | |||
| 1f53819b64 | |||
| d211b7ab13 | |||
| 057ee146bd |
@@ -150,7 +150,7 @@ jobs:
|
||||
done
|
||||
- name: Pack provider packages
|
||||
run: |
|
||||
for dir in packages/providers/*; do
|
||||
for dir in packages/providers/* packages/providers/storage/*; do
|
||||
if [ -d "$dir" ] && [ -f "$dir/package.json" ]; then
|
||||
echo "Packing $dir"
|
||||
pnpm pack --pack-destination ${{ runner.temp }} -C $dir
|
||||
|
||||
@@ -76,7 +76,7 @@ If you need any of those classes, you have to import them instead directly thoug
|
||||
Here's an example for importing the `PineconeVectorStore` class:
|
||||
|
||||
```typescript
|
||||
import { PineconeVectorStore } from "llamaindex/storage/vectorStore/PineconeVectorStore";
|
||||
import { PineconeVectorStore } from "llamaindex/vector-store/PineconeVectorStore";
|
||||
```
|
||||
|
||||
As the `PDFReader` is not working with the Edge runtime, here's how to use the `SimpleDirectoryReader` with the `LlamaParseReader` to load PDFs:
|
||||
|
||||
@@ -1,5 +1,55 @@
|
||||
# @llamaindex/doc
|
||||
|
||||
## 0.0.38
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- Updated dependencies [7265f74]
|
||||
- llamaindex@0.8.34
|
||||
- @llamaindex/openai@0.1.48
|
||||
|
||||
## 0.0.37
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2019a04]
|
||||
- @llamaindex/openai@0.1.47
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.0.36
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- f02621e: Fix internal links between chapters
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [d6c270e]
|
||||
- Updated dependencies [1892e1c]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/core@0.4.21
|
||||
- @llamaindex/cloud@2.0.22
|
||||
- @llamaindex/openai@0.1.46
|
||||
- @llamaindex/node-parser@0.0.22
|
||||
- @llamaindex/readers@1.0.23
|
||||
|
||||
## 0.0.35
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5dec9f9]
|
||||
- Updated dependencies [fd9c829]
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- @llamaindex/cloud@2.0.21
|
||||
- llamaindex@0.8.31
|
||||
- @llamaindex/core@0.4.20
|
||||
- @llamaindex/node-parser@0.0.21
|
||||
- @llamaindex/openai@0.1.45
|
||||
- @llamaindex/readers@1.0.22
|
||||
|
||||
## 0.0.34
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/doc",
|
||||
"version": "0.0.34",
|
||||
"version": "0.0.38",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"build": "pnpm run build:docs && next build",
|
||||
|
||||
@@ -20,7 +20,7 @@ npm install llamaindex
|
||||
|
||||
## Choose your model
|
||||
|
||||
By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](local_model).
|
||||
By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](3_local_model).
|
||||
|
||||
## Get an OpenAI API key
|
||||
|
||||
@@ -36,4 +36,4 @@ We'll use `dotenv` to pull the API key out of that .env file, so also run:
|
||||
npm install dotenv
|
||||
```
|
||||
|
||||
Now you're ready to [create your agent](create_agent).
|
||||
Now you're ready to [create your agent](2_create_agent).
|
||||
|
||||
@@ -177,5 +177,5 @@ The second piece of output is the response from the LLM itself, where the `messa
|
||||
Great! We've built an agent with tool use! Next you can:
|
||||
|
||||
- [See the full code](https://github.com/run-llama/ts-agents/blob/main/1_agent/agent.ts)
|
||||
- [Switch to a local LLM](local_model)
|
||||
- Move on to [add Retrieval-Augmented Generation to your agent](agentic_rag)
|
||||
- [Switch to a local LLM](3_local_model)
|
||||
- Move on to [add Retrieval-Augmented Generation to your agent](4_agentic_rag)
|
||||
|
||||
@@ -89,4 +89,4 @@ You can use a ReActAgent instead of an OpenAIAgent in any of the further example
|
||||
|
||||
### Next steps
|
||||
|
||||
Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](agentic_rag).
|
||||
Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](4_agentic_rag).
|
||||
|
||||
@@ -153,4 +153,4 @@ The `OpenAIContextAwareAgent` approach simplifies the setup by allowing you to d
|
||||
|
||||
On the other hand, using the `QueryEngineTool` offers more flexibility and power. This method allows for customization in how queries are constructed and executed, enabling you to query data from various storages and process them in different ways. However, this added flexibility comes with increased complexity and response time due to the separate tool call and queryEngine generating tool output by LLM that is then passed to the agent.
|
||||
|
||||
So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](rag_and_tools)!
|
||||
So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](5_rag_and_tools)!
|
||||
|
||||
@@ -127,4 +127,4 @@ In the final tool call, it used the `sumNumbers` function to add the two budgets
|
||||
}
|
||||
```
|
||||
|
||||
Great! Now let's improve accuracy by improving our parsing with [LlamaParse](llamaparse).
|
||||
Great! Now let's improve accuracy by improving our parsing with [LlamaParse](6_llamaparse).
|
||||
|
||||
@@ -17,4 +17,4 @@ const documents = await reader.loadData("../data/sf_budget_2023_2024.pdf");
|
||||
|
||||
Now you will be able to ask more complicated questions of the same PDF and get better results. You can find this code [in our repo](https://github.com/run-llama/ts-agents/blob/main/4_llamaparse/agent.ts).
|
||||
|
||||
Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](qdrant).
|
||||
Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](7_qdrant).
|
||||
|
||||
@@ -65,13 +65,13 @@ Since parsing a PDF can be slow, especially a large one, using the pre-parsed ch
|
||||
|
||||
In this guide you've learned how to
|
||||
|
||||
- [Create an agent](create_agent)
|
||||
- [Create an agent](2_create_agent)
|
||||
- Use remote LLMs like GPT-4
|
||||
- [Use local LLMs like Mixtral](local_model)
|
||||
- [Create a RAG query engine](agentic_rag)
|
||||
- [Turn functions and query engines into agent tools](rag_and_tools)
|
||||
- [Use local LLMs like Mixtral](3_local_model)
|
||||
- [Create a RAG query engine](4_agentic_rag)
|
||||
- [Turn functions and query engines into agent tools](5_rag_and_tools)
|
||||
- Combine those tools
|
||||
- [Enhance your parsing with LlamaParse](llamaparse)
|
||||
- [Enhance your parsing with LlamaParse](6_llamaparse)
|
||||
- Persist your data in a vector store
|
||||
|
||||
The next steps are up to you! Try creating more complex functions and query engines, and set your agent loose on the world.
|
||||
|
||||
@@ -21,4 +21,4 @@ Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for t
|
||||
|
||||
## API Reference
|
||||
|
||||
- [VectorStoreBase](/docs/api/classes/VectorStoreBase)
|
||||
- [BaseVectorStore](/docs/api/classes/BaseVectorStore)
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# @llamaindex/core-e2e
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 34faf48: chore: move vector stores to their own packages
|
||||
- 9456616: refactor: @llamaindex/postgres
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
# @llamaindex/cloudflare-worker-agent-test
|
||||
|
||||
## 0.0.130
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 0.0.129
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.0.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 0.0.126
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloudflare-worker-agent-test",
|
||||
"version": "0.0.126",
|
||||
"version": "0.0.130",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,5 +1,20 @@
|
||||
# @llamaindex/llama-parse-browser-test
|
||||
|
||||
## 0.0.42
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d6c270e]
|
||||
- @llamaindex/cloud@2.0.22
|
||||
|
||||
## 0.0.41
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [5dec9f9]
|
||||
- Updated dependencies [fd9c829]
|
||||
- @llamaindex/cloud@2.0.21
|
||||
|
||||
## 0.0.40
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/llama-parse-browser-test",
|
||||
"private": true,
|
||||
"version": "0.0.40",
|
||||
"version": "0.0.42",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
@@ -10,7 +10,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.7.2",
|
||||
"vite": "^5.4.11",
|
||||
"vite": "^5.4.12",
|
||||
"vite-plugin-wasm": "^3.3.0"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
# @llamaindex/next-agent-test
|
||||
|
||||
## 0.1.130
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 0.1.129
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.1.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.1.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 0.1.126
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-agent-test",
|
||||
"version": "0.1.126",
|
||||
"version": "0.1.130",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
# test-edge-runtime
|
||||
|
||||
## 0.1.129
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 0.1.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.1.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.1.126
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 0.1.125
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/nextjs-edge-runtime-test",
|
||||
"version": "0.1.125",
|
||||
"version": "0.1.129",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
# @llamaindex/next-node-runtime
|
||||
|
||||
## 0.0.111
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 0.0.110
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.0.109
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.108
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 0.0.107
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-node-runtime-test",
|
||||
"version": "0.0.107",
|
||||
"version": "0.0.111",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
# @llamaindex/waku-query-engine-test
|
||||
|
||||
## 0.0.130
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 0.0.129
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.0.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 0.0.126
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/waku-query-engine-test",
|
||||
"version": "0.0.126",
|
||||
"version": "0.0.130",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/e2e",
|
||||
"private": true,
|
||||
"version": "0.0.7",
|
||||
"version": "0.0.8",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"e2e": "node --import tsx --import ./mock-register.js --test ./node/**/*.e2e.ts",
|
||||
|
||||
@@ -1,5 +1,40 @@
|
||||
# examples
|
||||
|
||||
## 0.1.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- 21769c8: Update deprecated response property of query engine to message.content propery
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.0.23
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 8744796: Update the chromadb npm client to support the latest chromadb image (0.6.3)
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/core@0.4.21
|
||||
- @llamaindex/vercel@0.0.8
|
||||
- @llamaindex/readers@1.0.23
|
||||
|
||||
## 0.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
- @llamaindex/core@0.4.20
|
||||
- @llamaindex/vercel@0.0.7
|
||||
- @llamaindex/readers@1.0.22
|
||||
|
||||
## 0.0.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -33,12 +33,12 @@ async function main() {
|
||||
});
|
||||
|
||||
// Chat with the agent
|
||||
const response = await agent.chat({
|
||||
const { message } = await agent.chat({
|
||||
message: "What was his first salary?",
|
||||
});
|
||||
|
||||
// Print the response
|
||||
console.log(response.response);
|
||||
console.log(message.content);
|
||||
}
|
||||
|
||||
void main().then(() => {
|
||||
|
||||
@@ -52,12 +52,12 @@ async function main() {
|
||||
});
|
||||
|
||||
// Chat with the agent
|
||||
const response = await agent.chat({
|
||||
const { message } = await agent.chat({
|
||||
message: "What was his first salary?",
|
||||
});
|
||||
|
||||
// Print the response
|
||||
console.log(response.response);
|
||||
console.log(message.content);
|
||||
}
|
||||
|
||||
void main().then(() => {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { OpenAI, OpenAIAgent, WikipediaTool } from "llamaindex";
|
||||
import { OpenAI, OpenAIAgent } from "llamaindex";
|
||||
import { WikipediaTool } from "../wiki";
|
||||
|
||||
async function main() {
|
||||
const llm = new OpenAI({ model: "gpt-4-turbo" });
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Anthropic, FunctionTool, Settings, WikipediaTool } from "llamaindex";
|
||||
import { Anthropic, FunctionTool, Settings } from "llamaindex";
|
||||
import { AnthropicAgent } from "llamaindex/agent/anthropic";
|
||||
import { WikipediaTool } from "../wiki";
|
||||
|
||||
Settings.callbackManager.on("llm-tool-call", (event) => {
|
||||
console.log("llm-tool-call", event.detail.toolCall);
|
||||
@@ -37,12 +38,12 @@ const agent = new AnthropicAgent({
|
||||
});
|
||||
|
||||
async function main() {
|
||||
const { response } = await agent.chat({
|
||||
const { message } = await agent.chat({
|
||||
message:
|
||||
"What is the weather in New York? What's the history of New York from Wikipedia in 3 sentences?",
|
||||
});
|
||||
|
||||
console.log(response);
|
||||
console.log(message.content);
|
||||
}
|
||||
|
||||
void main();
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
const collectionName = "movie_reviews";
|
||||
|
||||
async function main() {
|
||||
const sourceFile: string = "./data/movie_reviews.csv";
|
||||
const sourceFile: string = "../data/movie_reviews.csv";
|
||||
|
||||
try {
|
||||
console.log(`Loading data from ${sourceFile}`);
|
||||
|
||||
+2
-2
@@ -38,12 +38,12 @@ async function main() {
|
||||
const query = "What is the meaning of life?";
|
||||
|
||||
// Query
|
||||
const response = await queryEngine.query({
|
||||
const { message } = await queryEngine.query({
|
||||
query,
|
||||
});
|
||||
|
||||
// Log the response
|
||||
console.log(response.response);
|
||||
console.log(message.content);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/examples",
|
||||
"private": true,
|
||||
"version": "0.0.21",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.0.5",
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
@@ -9,9 +9,9 @@
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@azure/search-documents": "^12.1.0",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@llamaindex/core": "^0.4.19",
|
||||
"@llamaindex/readers": "^1.0.21",
|
||||
"@llamaindex/vercel": "^0.0.6",
|
||||
"@llamaindex/core": "^0.4.21",
|
||||
"@llamaindex/readers": "^1.0.23",
|
||||
"@llamaindex/vercel": "^0.0.8",
|
||||
"@llamaindex/workflow": "^0.0.8",
|
||||
"@notionhq/client": "^2.2.15",
|
||||
"@pinecone-database/pinecone": "^4.0.0",
|
||||
@@ -22,10 +22,12 @@
|
||||
"commander": "^12.1.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"llamaindex": "^0.8.28",
|
||||
"mongodb": "^6.7.0",
|
||||
"llamaindex": "^0.8.33",
|
||||
"mongodb": "6.7.0",
|
||||
"pathe": "^1.1.2",
|
||||
"postgres": "^3.4.4"
|
||||
"postgres": "^3.4.4",
|
||||
"ajv": "^8.17.1",
|
||||
"wikipedia": "^2.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.9.0",
|
||||
|
||||
@@ -33,19 +33,19 @@ async function main() {
|
||||
retriever,
|
||||
});
|
||||
|
||||
const response = await queryEngine.query({
|
||||
const { message } = await queryEngine.query({
|
||||
query: "What did the author do growing up?",
|
||||
});
|
||||
|
||||
// cohere response
|
||||
console.log(response.response);
|
||||
console.log(message.content);
|
||||
|
||||
const baseResponse = await baseQueryEngine.query({
|
||||
const { message: baseMessage } = await baseQueryEngine.query({
|
||||
query: "What did the author do growing up?",
|
||||
});
|
||||
|
||||
// response without cohere
|
||||
console.log(baseResponse.response);
|
||||
console.log(baseMessage.content);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
|
||||
@@ -37,7 +37,7 @@ Read and follow the instructions in the README.md file located one directory up
|
||||
|
||||
To import documents and save the embedding vectors to your database:
|
||||
|
||||
> `npx tsx pg-vector-store/load-docs.ts data`
|
||||
> `npx tsx vector-store/pg/load-docs.ts data`
|
||||
|
||||
where data is the directory containing your input files. Using the `data` directory in the example above will read all of the files in that directory using the LlamaIndexTS default readers for each file type.
|
||||
|
||||
@@ -45,6 +45,23 @@ where data is the directory containing your input files. Using the `data` direct
|
||||
|
||||
To query using the resulting vector store:
|
||||
|
||||
> `npx tsx pg-vector-store/query.ts`
|
||||
> `npx tsx vector-store/pg/query.ts`
|
||||
|
||||
The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query.
|
||||
|
||||
## Supabase
|
||||
|
||||
You can try the supabase example by running:
|
||||
|
||||
> `npx tsx vector-store/pg/supabase.ts`
|
||||
|
||||
This will use the `POSTGRES_URL` environment variable to connect to your Supabase database.
|
||||
Get one from the Supabase project settings page. See more details here: https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connection
|
||||
|
||||
## Vercel
|
||||
|
||||
You can try the vercel example by running:
|
||||
|
||||
> `npx tsx vector-store/pg/vercel.ts`
|
||||
|
||||
For more information on Vercel Postgres, see: https://vercel.com/docs/storage/vercel-postgres/sdk
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
// load-docs.ts
|
||||
import {
|
||||
PGVectorStore,
|
||||
SimpleDirectoryReader,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
import fs from "node:fs/promises";
|
||||
|
||||
async function getSourceFilenames(sourceDir: string) {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import dotenv from "dotenv";
|
||||
import { Document, PGVectorStore, VectorStoreQueryMode } from "llamaindex";
|
||||
import { Document, VectorStoreQueryMode } from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
import postgres from "postgres";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { PGVectorStore, VectorStoreIndex } from "llamaindex";
|
||||
import { VectorStoreIndex } from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
|
||||
async function main() {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import dotenv from "dotenv";
|
||||
import {
|
||||
SimpleDirectoryReader,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// Get direct connection string from Supabase and set it as POSTGRES_URL environment variable
|
||||
// https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connection
|
||||
|
||||
const sourceDir = "../data";
|
||||
const connectionString = process.env.POSTGRES_URL;
|
||||
|
||||
const rdr = new SimpleDirectoryReader();
|
||||
const docs = await rdr.loadData({ directoryPath: sourceDir });
|
||||
const pgvs = new PGVectorStore({ clientConfig: { connectionString } });
|
||||
pgvs.setCollection(sourceDir);
|
||||
|
||||
const ctx = await storageContextFromDefaults({ vectorStore: pgvs });
|
||||
|
||||
const index = await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
|
||||
const queryEngine = index.asQueryEngine();
|
||||
|
||||
const results = await queryEngine.query({
|
||||
query: "Information about the planet",
|
||||
});
|
||||
|
||||
console.log(results);
|
||||
@@ -1,7 +1,8 @@
|
||||
// https://vercel.com/docs/storage/vercel-postgres/sdk
|
||||
import { sql } from "@vercel/postgres";
|
||||
import dotenv from "dotenv";
|
||||
import { Document, PGVectorStore, VectorStoreQueryMode } from "llamaindex";
|
||||
import { Document, VectorStoreQueryMode } from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
|
||||
@@ -21,12 +21,12 @@ async function main() {
|
||||
|
||||
// Query the index
|
||||
const queryEngine = index.asQueryEngine();
|
||||
const { response, sourceNodes } = await queryEngine.query({
|
||||
const { message, sourceNodes } = await queryEngine.query({
|
||||
query: "What did the author do in college?",
|
||||
});
|
||||
|
||||
// Output response with sources
|
||||
console.log(response);
|
||||
console.log(message.content);
|
||||
|
||||
if (sourceNodes) {
|
||||
sourceNodes.forEach((source: NodeWithScore, index: number) => {
|
||||
|
||||
@@ -29,10 +29,10 @@ async function main() {
|
||||
nodePostprocessor,
|
||||
]);
|
||||
|
||||
const response = await queryEngine.query({
|
||||
const { message } = await queryEngine.query({
|
||||
query: "What did the author do growing up?",
|
||||
});
|
||||
console.log(response.response);
|
||||
console.log(message.content);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { openai } from "@ai-sdk/openai";
|
||||
import { VercelLLM } from "@llamaindex/vercel";
|
||||
import { LLMAgent, WikipediaTool } from "llamaindex";
|
||||
import { LLMAgent } from "llamaindex";
|
||||
import { WikipediaTool } from "../wiki";
|
||||
|
||||
async function main() {
|
||||
// Create an instance of VercelLLM with the OpenAI model
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/** Example of a tool that uses Wikipedia */
|
||||
|
||||
import type { BaseTool, ToolMetadata } from "@llamaindex/core/llms";
|
||||
import type { JSONSchemaType } from "ajv";
|
||||
import { default as wiki } from "wikipedia";
|
||||
@@ -7,7 +9,7 @@ type WikipediaParameter = {
|
||||
lang?: string;
|
||||
};
|
||||
|
||||
export type WikipediaToolParams = {
|
||||
type WikipediaToolParams = {
|
||||
metadata?: ToolMetadata<JSONSchemaType<WikipediaParameter>>;
|
||||
};
|
||||
|
||||
@@ -43,8 +45,8 @@ export class WikipediaTool implements BaseTool<WikipediaParameter> {
|
||||
page: string,
|
||||
lang: string = this.DEFAULT_LANG,
|
||||
): Promise<string> {
|
||||
wiki.default.setLang(lang);
|
||||
const pageResult = await wiki.default.page(page, { autoSuggest: false });
|
||||
wiki.setLang(lang);
|
||||
const pageResult = await wiki.page(page, { autoSuggest: false });
|
||||
const content = await pageResult.content();
|
||||
return content;
|
||||
}
|
||||
@@ -53,7 +55,7 @@ export class WikipediaTool implements BaseTool<WikipediaParameter> {
|
||||
query,
|
||||
lang = this.DEFAULT_LANG,
|
||||
}: WikipediaParameter): Promise<string> {
|
||||
const searchResult = await wiki.default.search(query);
|
||||
const searchResult = await wiki.search(query);
|
||||
if (searchResult.results.length === 0) return "No search results.";
|
||||
return await this.loadData(searchResult.results[0].title, lang);
|
||||
}
|
||||
@@ -1,5 +1,36 @@
|
||||
# @llamaindex/autotool
|
||||
|
||||
## 5.0.34
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 5.0.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 5.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 5.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 5.0.30
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,40 @@
|
||||
# @llamaindex/autotool-01-node-example
|
||||
|
||||
## 0.0.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
- @llamaindex/autotool@5.0.34
|
||||
|
||||
## 0.0.76
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
- @llamaindex/autotool@5.0.33
|
||||
|
||||
## 0.0.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/autotool@5.0.32
|
||||
|
||||
## 0.0.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
- @llamaindex/autotool@5.0.31
|
||||
|
||||
## 0.0.73
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -13,5 +13,5 @@
|
||||
"scripts": {
|
||||
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
|
||||
},
|
||||
"version": "0.0.73"
|
||||
"version": "0.0.77"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,40 @@
|
||||
# @llamaindex/autotool-02-next-example
|
||||
|
||||
## 0.1.121
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
- @llamaindex/autotool@5.0.34
|
||||
|
||||
## 0.1.120
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
- @llamaindex/autotool@5.0.33
|
||||
|
||||
## 0.1.119
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/autotool@5.0.32
|
||||
|
||||
## 0.1.118
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
- @llamaindex/autotool@5.0.31
|
||||
|
||||
## 0.1.117
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool-02-next-example",
|
||||
"private": true,
|
||||
"version": "0.1.117",
|
||||
"version": "0.1.121",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool",
|
||||
"type": "module",
|
||||
"version": "5.0.30",
|
||||
"version": "5.0.34",
|
||||
"description": "auto transpile your JS function to LLM Agent compatible",
|
||||
"files": [
|
||||
"dist",
|
||||
@@ -70,7 +70,7 @@
|
||||
"@swc/types": "^0.1.12",
|
||||
"@types/json-schema": "^7.0.15",
|
||||
"@types/node": "^22.9.0",
|
||||
"bunchee": "6.0.3",
|
||||
"bunchee": "6.2.0",
|
||||
"llamaindex": "workspace:*",
|
||||
"next": "15.0.3",
|
||||
"rollup": "^4.28.1",
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
# @llamaindex/cloud
|
||||
|
||||
## 2.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- d6c270e: feat: support pass project and org id to llama parse reader
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- @llamaindex/core@0.4.21
|
||||
|
||||
## 2.0.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 5dec9f9: chore: bump sdk deps version
|
||||
- fd9c829: chore: bump llamacloud openapi
|
||||
- Updated dependencies [d211b7a]
|
||||
- @llamaindex/core@0.4.20
|
||||
|
||||
## 2.0.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -10,7 +10,13 @@ export default defineConfig({
|
||||
format: "prettier",
|
||||
lint: "eslint",
|
||||
},
|
||||
types: {
|
||||
enums: "javascript",
|
||||
},
|
||||
plugins: [
|
||||
"@hey-api/schemas",
|
||||
"@hey-api/sdk",
|
||||
{
|
||||
enums: "javascript",
|
||||
identifierCase: "preserve",
|
||||
name: "@hey-api/typescript",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
+4694
-190
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloud",
|
||||
"version": "2.0.20",
|
||||
"version": "2.0.22",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
@@ -50,11 +50,11 @@
|
||||
"directory": "packages/cloud"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@hey-api/client-fetch": "^0.4.4",
|
||||
"@hey-api/openapi-ts": "^0.56.0",
|
||||
"@hey-api/client-fetch": "^0.6.0",
|
||||
"@hey-api/openapi-ts": "^0.61.0",
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*",
|
||||
"bunchee": "6.0.3"
|
||||
"bunchee": "6.2.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@llamaindex/core": "workspace:*",
|
||||
|
||||
@@ -31,6 +31,8 @@ var process: any;
|
||||
* See https://github.com/run-llama/llama_parse
|
||||
*/
|
||||
export class LlamaParseReader extends FileReader {
|
||||
project_id?: string | undefined;
|
||||
organization_id?: string | undefined;
|
||||
// The API key for the LlamaParse API. Can be set as an environment variable: LLAMA_CLOUD_API_KEY
|
||||
apiKey: string;
|
||||
// The base URL of the Llama Cloud Platform.
|
||||
@@ -118,6 +120,7 @@ export class LlamaParseReader extends FileReader {
|
||||
structured_output?: boolean | undefined;
|
||||
structured_output_json_schema?: string | undefined;
|
||||
structured_output_json_schema_name?: string | undefined;
|
||||
extract_layout?: boolean | undefined;
|
||||
|
||||
// numWorkers is implemented in SimpleDirectoryReader
|
||||
stdout?: WriteStream | undefined;
|
||||
@@ -248,6 +251,7 @@ export class LlamaParseReader extends FileReader {
|
||||
structured_output_json_schema: this.structured_output_json_schema,
|
||||
structured_output_json_schema_name:
|
||||
this.structured_output_json_schema_name,
|
||||
extract_layout: this.extract_layout,
|
||||
} satisfies {
|
||||
[Key in keyof Body_upload_file_api_v1_parsing_upload_post]-?:
|
||||
| Body_upload_file_api_v1_parsing_upload_post[Key]
|
||||
@@ -257,6 +261,10 @@ export class LlamaParseReader extends FileReader {
|
||||
const response = await uploadFileApiV1ParsingUploadPost({
|
||||
client: this.#client,
|
||||
throwOnError: true,
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal: AbortSignal.timeout(this.maxTimeout * 1000),
|
||||
body,
|
||||
});
|
||||
@@ -282,6 +290,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
const { data } = result;
|
||||
@@ -298,6 +310,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
@@ -309,6 +325,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
@@ -320,6 +340,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
@@ -509,6 +533,10 @@ export class LlamaParseReader extends FileReader {
|
||||
job_id: jobId,
|
||||
name: imageName,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
});
|
||||
if (response.error) {
|
||||
throw new Error(`Failed to download image: ${response.error.detail}`);
|
||||
|
||||
@@ -1,5 +1,20 @@
|
||||
# @llamaindex/community
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- @llamaindex/core@0.4.21
|
||||
|
||||
## 0.0.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- @llamaindex/core@0.4.20
|
||||
|
||||
## 0.0.77
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/community",
|
||||
"description": "Community package for LlamaIndexTS",
|
||||
"version": "0.0.77",
|
||||
"version": "0.0.79",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
@@ -43,7 +43,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.9.0",
|
||||
"bunchee": "6.0.3"
|
||||
"bunchee": "6.2.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-bedrock-agent-runtime": "^3.706.0",
|
||||
|
||||
@@ -1,5 +1,18 @@
|
||||
# @llamaindex/core
|
||||
|
||||
## 0.4.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 9456616: refactor: @llamaindex/postgres
|
||||
- 1931bbc: refactor: @llamaindex/azure
|
||||
|
||||
## 0.4.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- d211b7a: added support for tool calls with results in message history for athropic agent
|
||||
|
||||
## 0.4.19
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/core",
|
||||
"type": "module",
|
||||
"version": "0.4.19",
|
||||
"version": "0.4.21",
|
||||
"description": "LlamaIndex Core Module",
|
||||
"exports": {
|
||||
"./agent": {
|
||||
@@ -391,7 +391,7 @@
|
||||
"devDependencies": {
|
||||
"@edge-runtime/vm": "^4.0.4",
|
||||
"ajv": "^8.17.1",
|
||||
"bunchee": "6.0.3",
|
||||
"bunchee": "6.2.0",
|
||||
"happy-dom": "^15.11.6",
|
||||
"natural": "^8.0.1"
|
||||
},
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { Tokenizer } from "@llamaindex/env/tokenizers";
|
||||
import type { BaseEmbedding } from "../embeddings";
|
||||
import type { LLM } from "../llms";
|
||||
import {
|
||||
type CallbackManager,
|
||||
@@ -12,6 +13,11 @@ import {
|
||||
setChunkSize,
|
||||
withChunkSize,
|
||||
} from "./settings/chunk-size";
|
||||
import {
|
||||
getEmbeddedModel,
|
||||
setEmbeddedModel,
|
||||
withEmbeddedModel,
|
||||
} from "./settings/embedModel";
|
||||
import { getLLM, setLLM, withLLM } from "./settings/llm";
|
||||
import {
|
||||
getTokenizer,
|
||||
@@ -29,6 +35,15 @@ export const Settings = {
|
||||
withLLM<Result>(llm: LLM, fn: () => Result): Result {
|
||||
return withLLM(llm, fn);
|
||||
},
|
||||
get embedModel() {
|
||||
return getEmbeddedModel();
|
||||
},
|
||||
set embedModel(embedModel) {
|
||||
setEmbeddedModel(embedModel);
|
||||
},
|
||||
withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
|
||||
return withEmbeddedModel(embedModel, fn);
|
||||
},
|
||||
get tokenizer() {
|
||||
return getTokenizer();
|
||||
},
|
||||
|
||||
+7
-4
@@ -1,15 +1,18 @@
|
||||
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { AsyncLocalStorage } from "@llamaindex/env";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
|
||||
const embeddedModelAsyncLocalStorage = new AsyncLocalStorage<BaseEmbedding>();
|
||||
let globalEmbeddedModel: BaseEmbedding | null = null;
|
||||
|
||||
export function getEmbeddedModel(): BaseEmbedding {
|
||||
if (globalEmbeddedModel === null) {
|
||||
globalEmbeddedModel = new OpenAIEmbedding();
|
||||
const currentEmbeddedModel =
|
||||
embeddedModelAsyncLocalStorage.getStore() ?? globalEmbeddedModel;
|
||||
if (!currentEmbeddedModel) {
|
||||
throw new Error(
|
||||
"Cannot find Embedding, please set `Settings.embedModel = ...` on the top of your code",
|
||||
);
|
||||
}
|
||||
return embeddedModelAsyncLocalStorage.getStore() ?? globalEmbeddedModel;
|
||||
return currentEmbeddedModel;
|
||||
}
|
||||
|
||||
export function setEmbeddedModel(embeddedModel: BaseEmbedding) {
|
||||
@@ -0,0 +1,167 @@
|
||||
import { path } from "@llamaindex/env";
|
||||
import {
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "../../global";
|
||||
import type { StoredValue } from "../../schema";
|
||||
import { BaseNode, Document, ObjectType, TextNode } from "../../schema";
|
||||
|
||||
const TYPE_KEY = "__type__";
|
||||
const DATA_KEY = "__data__";
|
||||
|
||||
export interface Serializer<T> {
|
||||
toPersistence(data: Record<string, unknown>): T;
|
||||
|
||||
fromPersistence(data: T): Record<string, unknown>;
|
||||
}
|
||||
|
||||
export const jsonSerializer: Serializer<string> = {
|
||||
toPersistence(data) {
|
||||
return JSON.stringify(data);
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return JSON.parse(data);
|
||||
},
|
||||
};
|
||||
|
||||
export const noneSerializer: Serializer<Record<string, unknown>> = {
|
||||
toPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
};
|
||||
|
||||
type DocJson<Data> = {
|
||||
[TYPE_KEY]: ObjectType;
|
||||
[DATA_KEY]: Data;
|
||||
};
|
||||
|
||||
export function isValidDocJson(
|
||||
docJson: StoredValue | null | undefined,
|
||||
): docJson is DocJson<unknown> {
|
||||
return (
|
||||
typeof docJson === "object" &&
|
||||
docJson !== null &&
|
||||
docJson[TYPE_KEY] !== undefined &&
|
||||
docJson[DATA_KEY] !== undefined
|
||||
);
|
||||
}
|
||||
|
||||
export function docToJson(
|
||||
doc: BaseNode,
|
||||
serializer: Serializer<unknown>,
|
||||
): DocJson<unknown> {
|
||||
return {
|
||||
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
|
||||
[TYPE_KEY]: doc.type,
|
||||
};
|
||||
}
|
||||
|
||||
export function jsonToDoc<Data>(
|
||||
docDict: DocJson<Data>,
|
||||
serializer: Serializer<Data>,
|
||||
): BaseNode {
|
||||
const docType = docDict[TYPE_KEY];
|
||||
// fixme: zod type check this
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]);
|
||||
let doc: BaseNode;
|
||||
|
||||
if (docType === ObjectType.DOCUMENT) {
|
||||
doc = new Document({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
embedding: dataDict.embedding,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
});
|
||||
} else if (docType === ObjectType.TEXT) {
|
||||
doc = new TextNode({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
relationships: dataDict.relationships,
|
||||
});
|
||||
} else {
|
||||
throw new Error(`Unknown doc type: ${docType}`);
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
const DEFAULT_PERSIST_PATH = path.join(
|
||||
DEFAULT_PERSIST_DIR,
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
);
|
||||
|
||||
export interface RefDocInfo {
|
||||
nodeIds: string[];
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
extraInfo: Record<string, any>;
|
||||
}
|
||||
|
||||
export abstract class BaseDocumentStore {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
serializer: Serializer<any> = jsonSerializer;
|
||||
|
||||
// Save/load
|
||||
persist(persistPath: string = DEFAULT_PERSIST_PATH): void {
|
||||
// Persist the docstore to a file.
|
||||
}
|
||||
|
||||
// Main interface
|
||||
abstract docs(): Promise<Record<string, BaseNode>>;
|
||||
|
||||
abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>;
|
||||
|
||||
abstract getDocument(
|
||||
docId: string,
|
||||
raiseError: boolean,
|
||||
): Promise<BaseNode | undefined>;
|
||||
|
||||
abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
abstract documentExists(docId: string): Promise<boolean>;
|
||||
|
||||
// Hash
|
||||
abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
|
||||
|
||||
abstract getDocumentHash(docId: string): Promise<string | undefined>;
|
||||
|
||||
abstract getAllDocumentHashes(): Promise<Record<string, string>>;
|
||||
|
||||
// Ref Docs
|
||||
abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>;
|
||||
|
||||
abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>;
|
||||
|
||||
abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
// Nodes
|
||||
getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> {
|
||||
return Promise.all(
|
||||
nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)),
|
||||
);
|
||||
}
|
||||
|
||||
async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> {
|
||||
const doc = await this.getDocument(nodeId, raiseError);
|
||||
if (!(doc instanceof BaseNode)) {
|
||||
throw new Error(`Document ${nodeId} is not a Node.`);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
async getNodeDict(nodeIdDict: {
|
||||
[index: number]: string;
|
||||
}): Promise<Record<number, BaseNode>> {
|
||||
const result: Record<number, BaseNode> = {};
|
||||
for (const index in nodeIdDict) {
|
||||
result[index] = await this.getNode(nodeIdDict[index]!);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,167 +1,2 @@
|
||||
import { path } from "@llamaindex/env";
|
||||
import {
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "../../global";
|
||||
import type { StoredValue } from "../../schema";
|
||||
import { BaseNode, Document, ObjectType, TextNode } from "../../schema";
|
||||
|
||||
const TYPE_KEY = "__type__";
|
||||
const DATA_KEY = "__data__";
|
||||
|
||||
export interface Serializer<T> {
|
||||
toPersistence(data: Record<string, unknown>): T;
|
||||
|
||||
fromPersistence(data: T): Record<string, unknown>;
|
||||
}
|
||||
|
||||
export const jsonSerializer: Serializer<string> = {
|
||||
toPersistence(data) {
|
||||
return JSON.stringify(data);
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return JSON.parse(data);
|
||||
},
|
||||
};
|
||||
|
||||
export const noneSerializer: Serializer<Record<string, unknown>> = {
|
||||
toPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
};
|
||||
|
||||
type DocJson<Data> = {
|
||||
[TYPE_KEY]: ObjectType;
|
||||
[DATA_KEY]: Data;
|
||||
};
|
||||
|
||||
export function isValidDocJson(
|
||||
docJson: StoredValue | null | undefined,
|
||||
): docJson is DocJson<unknown> {
|
||||
return (
|
||||
typeof docJson === "object" &&
|
||||
docJson !== null &&
|
||||
docJson[TYPE_KEY] !== undefined &&
|
||||
docJson[DATA_KEY] !== undefined
|
||||
);
|
||||
}
|
||||
|
||||
export function docToJson(
|
||||
doc: BaseNode,
|
||||
serializer: Serializer<unknown>,
|
||||
): DocJson<unknown> {
|
||||
return {
|
||||
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
|
||||
[TYPE_KEY]: doc.type,
|
||||
};
|
||||
}
|
||||
|
||||
export function jsonToDoc<Data>(
|
||||
docDict: DocJson<Data>,
|
||||
serializer: Serializer<Data>,
|
||||
): BaseNode {
|
||||
const docType = docDict[TYPE_KEY];
|
||||
// fixme: zod type check this
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]);
|
||||
let doc: BaseNode;
|
||||
|
||||
if (docType === ObjectType.DOCUMENT) {
|
||||
doc = new Document({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
embedding: dataDict.embedding,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
});
|
||||
} else if (docType === ObjectType.TEXT) {
|
||||
doc = new TextNode({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
relationships: dataDict.relationships,
|
||||
});
|
||||
} else {
|
||||
throw new Error(`Unknown doc type: ${docType}`);
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
const DEFAULT_PERSIST_PATH = path.join(
|
||||
DEFAULT_PERSIST_DIR,
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
);
|
||||
|
||||
export interface RefDocInfo {
|
||||
nodeIds: string[];
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
extraInfo: Record<string, any>;
|
||||
}
|
||||
|
||||
export abstract class BaseDocumentStore {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
serializer: Serializer<any> = jsonSerializer;
|
||||
|
||||
// Save/load
|
||||
persist(persistPath: string = DEFAULT_PERSIST_PATH): void {
|
||||
// Persist the docstore to a file.
|
||||
}
|
||||
|
||||
// Main interface
|
||||
abstract docs(): Promise<Record<string, BaseNode>>;
|
||||
|
||||
abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>;
|
||||
|
||||
abstract getDocument(
|
||||
docId: string,
|
||||
raiseError: boolean,
|
||||
): Promise<BaseNode | undefined>;
|
||||
|
||||
abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
abstract documentExists(docId: string): Promise<boolean>;
|
||||
|
||||
// Hash
|
||||
abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
|
||||
|
||||
abstract getDocumentHash(docId: string): Promise<string | undefined>;
|
||||
|
||||
abstract getAllDocumentHashes(): Promise<Record<string, string>>;
|
||||
|
||||
// Ref Docs
|
||||
abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>;
|
||||
|
||||
abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>;
|
||||
|
||||
abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
// Nodes
|
||||
getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> {
|
||||
return Promise.all(
|
||||
nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)),
|
||||
);
|
||||
}
|
||||
|
||||
async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> {
|
||||
const doc = await this.getDocument(nodeId, raiseError);
|
||||
if (!(doc instanceof BaseNode)) {
|
||||
throw new Error(`Document ${nodeId} is not a Node.`);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
async getNodeDict(nodeIdDict: {
|
||||
[index: number]: string;
|
||||
}): Promise<Record<number, BaseNode>> {
|
||||
const result: Record<number, BaseNode> = {};
|
||||
for (const index in nodeIdDict) {
|
||||
result[index] = await this.getNode(nodeIdDict[index]!);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
export * from "./base-document-store";
|
||||
export * from "./kv-document-store";
|
||||
|
||||
+21
-19
@@ -1,15 +1,13 @@
|
||||
import { DEFAULT_NAMESPACE } from "@llamaindex/core/global";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { ObjectType } from "@llamaindex/core/schema";
|
||||
import type { RefDocInfo } from "@llamaindex/core/storage/doc-store";
|
||||
import { DEFAULT_NAMESPACE } from "../../global";
|
||||
import { BaseNode, ObjectType, type StoredValue } from "../../schema";
|
||||
import type { BaseKVStore } from "../kv-store";
|
||||
import {
|
||||
BaseDocumentStore,
|
||||
docToJson,
|
||||
isValidDocJson,
|
||||
jsonToDoc,
|
||||
} from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseKVStore } from "@llamaindex/core/storage/kv-store";
|
||||
import _ from "lodash";
|
||||
type RefDocInfo,
|
||||
} from "./base-document-store";
|
||||
|
||||
type DocMetaData = { docHash: string; refDocId?: string };
|
||||
|
||||
@@ -68,7 +66,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
extraInfo: {},
|
||||
};
|
||||
refDocInfo.nodeIds.push(doc.id_);
|
||||
if (_.isEmpty(refDocInfo.extraInfo)) {
|
||||
if (Object.keys(refDocInfo.extraInfo).length === 0) {
|
||||
refDocInfo.extraInfo = {};
|
||||
}
|
||||
await this.kvstore.put(
|
||||
@@ -88,7 +86,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
raiseError: boolean = true,
|
||||
): Promise<BaseNode | undefined> {
|
||||
const json = await this.kvstore.get(docId, this.nodeCollection);
|
||||
if (_.isNil(json)) {
|
||||
if (this.isNil(json)) {
|
||||
if (raiseError) {
|
||||
throw new Error(`docId ${docId} not found.`);
|
||||
} else {
|
||||
@@ -103,23 +101,23 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
|
||||
async getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined> {
|
||||
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
|
||||
return refDocInfo ? (_.clone(refDocInfo) as RefDocInfo) : undefined;
|
||||
return refDocInfo ? (structuredClone(refDocInfo) as RefDocInfo) : undefined;
|
||||
}
|
||||
|
||||
async getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined> {
|
||||
const refDocInfos = await this.kvstore.getAll(this.refDocCollection);
|
||||
if (_.isNil(refDocInfos)) {
|
||||
if (this.isNil(refDocInfos)) {
|
||||
return;
|
||||
}
|
||||
return refDocInfos as Record<string, RefDocInfo>;
|
||||
}
|
||||
|
||||
async refDocExists(refDocId: string): Promise<boolean> {
|
||||
return !_.isNil(await this.getRefDocInfo(refDocId));
|
||||
return !this.isNil(await this.getRefDocInfo(refDocId));
|
||||
}
|
||||
|
||||
async documentExists(docId: string): Promise<boolean> {
|
||||
return !_.isNil(await this.kvstore.get(docId, this.nodeCollection));
|
||||
return !this.isNil(await this.kvstore.get(docId, this.nodeCollection));
|
||||
}
|
||||
|
||||
private async removeRefDocNode(docId: string): Promise<void> {
|
||||
@@ -129,13 +127,13 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
}
|
||||
|
||||
const refDocId = metadata.refDocId;
|
||||
if (_.isNil(refDocId)) {
|
||||
if (this.isNil(refDocId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
|
||||
if (!_.isNil(refDocInfo)) {
|
||||
if (refDocInfo.nodeIds.length > 0) {
|
||||
if (!this.isNil(refDocInfo)) {
|
||||
if (refDocInfo!.nodeIds.length > 0) {
|
||||
await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection);
|
||||
}
|
||||
await this.kvstore.delete(refDocId, this.metadataCollection);
|
||||
@@ -164,7 +162,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
raiseError: boolean = true,
|
||||
): Promise<void> {
|
||||
const refDocInfo = await this.getRefDocInfo(refDocId);
|
||||
if (_.isNil(refDocInfo)) {
|
||||
if (this.isNil(refDocInfo)) {
|
||||
if (raiseError) {
|
||||
throw new Error(`ref_doc_id ${refDocId} not found.`);
|
||||
} else {
|
||||
@@ -172,7 +170,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
}
|
||||
}
|
||||
|
||||
for (const docId of refDocInfo.nodeIds) {
|
||||
for (const docId of refDocInfo!.nodeIds) {
|
||||
await this.deleteDocument(docId, false, false);
|
||||
}
|
||||
|
||||
@@ -187,7 +185,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
|
||||
async getDocumentHash(docId: string): Promise<string | undefined> {
|
||||
const metadata = await this.kvstore.get(docId, this.metadataCollection);
|
||||
return _.get(metadata, "docHash");
|
||||
return metadata?.docHash;
|
||||
}
|
||||
|
||||
async getAllDocumentHashes(): Promise<Record<string, string>> {
|
||||
@@ -201,4 +199,8 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
}
|
||||
return hashes;
|
||||
}
|
||||
|
||||
private isNil(value: RefDocInfo | StoredValue | undefined): boolean {
|
||||
return value === null || value === undefined;
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,7 @@
|
||||
import type { BaseEmbedding } from "../embeddings/base.js";
|
||||
import { Settings } from "../global";
|
||||
import type { BaseNode, ModalityType } from "../schema/node.js";
|
||||
|
||||
/**
|
||||
* should compatible with npm:pg and npm:postgres
|
||||
*/
|
||||
@@ -12,3 +16,134 @@ export interface IsomorphicDB {
|
||||
close: () => Promise<void>;
|
||||
onCloseEvent: (listener: () => void) => void;
|
||||
}
|
||||
|
||||
export interface VectorStoreQueryResult {
|
||||
nodes?: BaseNode[];
|
||||
similarities: number[];
|
||||
ids: string[];
|
||||
}
|
||||
|
||||
export enum VectorStoreQueryMode {
|
||||
DEFAULT = "default",
|
||||
SPARSE = "sparse",
|
||||
HYBRID = "hybrid",
|
||||
// fit learners
|
||||
SVM = "svm",
|
||||
LOGISTIC_REGRESSION = "logistic_regression",
|
||||
LINEAR_REGRESSION = "linear_regression",
|
||||
// maximum marginal relevance
|
||||
MMR = "mmr",
|
||||
|
||||
// for Azure AI Search
|
||||
SEMANTIC_HYBRID = "semantic_hybrid",
|
||||
}
|
||||
|
||||
export enum FilterOperator {
|
||||
EQ = "==", // default operator (string, number)
|
||||
IN = "in", // In array (string or number)
|
||||
GT = ">", // greater than (number)
|
||||
LT = "<", // less than (number)
|
||||
NE = "!=", // not equal to (string, number)
|
||||
GTE = ">=", // greater than or equal to (number)
|
||||
LTE = "<=", // less than or equal to (number)
|
||||
NIN = "nin", // Not in array (string or number)
|
||||
ANY = "any", // Contains any (array of strings)
|
||||
ALL = "all", // Contains all (array of strings)
|
||||
TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field)
|
||||
CONTAINS = "contains", // metadata array contains value (string or number)
|
||||
IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array)
|
||||
}
|
||||
|
||||
export enum FilterCondition {
|
||||
AND = "and",
|
||||
OR = "or",
|
||||
}
|
||||
|
||||
export type MetadataFilterValue = string | number | string[] | number[];
|
||||
|
||||
export interface MetadataFilter {
|
||||
key: string;
|
||||
value?: MetadataFilterValue;
|
||||
operator: `${FilterOperator}`; // ==, any, all,...
|
||||
}
|
||||
|
||||
export interface MetadataFilters {
|
||||
filters: Array<MetadataFilter>;
|
||||
condition?: `${FilterCondition}`; // and, or
|
||||
}
|
||||
|
||||
export interface MetadataInfo {
|
||||
name: string;
|
||||
type: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface VectorStoreInfo {
|
||||
metadataInfo: MetadataInfo[];
|
||||
contentInfo: string;
|
||||
}
|
||||
|
||||
export interface VectorStoreQuery {
|
||||
queryEmbedding?: number[];
|
||||
similarityTopK: number;
|
||||
docIds?: string[];
|
||||
queryStr?: string;
|
||||
mode: VectorStoreQueryMode;
|
||||
alpha?: number;
|
||||
filters?: MetadataFilters | undefined;
|
||||
mmrThreshold?: number;
|
||||
}
|
||||
|
||||
// Supported types of vector stores (for each modality)
|
||||
export type VectorStoreByType = {
|
||||
[P in ModalityType]?: BaseVectorStore;
|
||||
};
|
||||
|
||||
export type VectorStoreBaseParams = {
|
||||
embeddingModel?: BaseEmbedding | undefined;
|
||||
};
|
||||
|
||||
export abstract class BaseVectorStore<Client = unknown> {
|
||||
embedModel: BaseEmbedding;
|
||||
abstract storesText: boolean;
|
||||
isEmbeddingQuery?: boolean;
|
||||
abstract client(): Client;
|
||||
abstract add(embeddingResults: BaseNode[]): Promise<string[]>;
|
||||
abstract delete(refDocId: string, deleteOptions?: object): Promise<void>;
|
||||
abstract query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult>;
|
||||
|
||||
protected constructor(params?: VectorStoreBaseParams) {
|
||||
this.embedModel = params?.embeddingModel ?? Settings.embedModel;
|
||||
}
|
||||
}
|
||||
|
||||
export const parsePrimitiveValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string | number => {
|
||||
if (typeof value !== "number" && typeof value !== "string") {
|
||||
throw new Error("Value must be a string or number");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseArrayValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string[] | number[] => {
|
||||
const isPrimitiveArray =
|
||||
Array.isArray(value) &&
|
||||
value.every((v) => typeof v === "string" || typeof v === "number");
|
||||
if (!isPrimitiveArray) {
|
||||
throw new Error("Value must be an array of strings or numbers");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseNumberValue = (value?: MetadataFilterValue): number => {
|
||||
if (typeof value !== "number") throw new Error("Value must be a number");
|
||||
return value;
|
||||
};
|
||||
|
||||
export * from "./utils.js";
|
||||
|
||||
+6
-29
@@ -1,6 +1,9 @@
|
||||
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
|
||||
import { ObjectType, jsonToNode } from "@llamaindex/core/schema";
|
||||
import type { MetadataFilterValue } from "./types.js";
|
||||
import {
|
||||
ObjectType,
|
||||
jsonToNode,
|
||||
type BaseNode,
|
||||
type Metadata,
|
||||
} from "../schema";
|
||||
|
||||
const DEFAULT_TEXT_KEY = "text";
|
||||
|
||||
@@ -91,32 +94,6 @@ export function metadataDictToNode(
|
||||
}
|
||||
}
|
||||
|
||||
export const parsePrimitiveValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string | number => {
|
||||
if (typeof value !== "number" && typeof value !== "string") {
|
||||
throw new Error("Value must be a string or number");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseArrayValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string[] | number[] => {
|
||||
const isPrimitiveArray =
|
||||
Array.isArray(value) &&
|
||||
value.every((v) => typeof v === "string" || typeof v === "number");
|
||||
if (!isPrimitiveArray) {
|
||||
throw new Error("Value must be an array of strings or numbers");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseNumberValue = (value?: MetadataFilterValue): number => {
|
||||
if (typeof value !== "number") throw new Error("Value must be a number");
|
||||
return value;
|
||||
};
|
||||
|
||||
export const escapeLikeString = (value: string) => {
|
||||
return value.replace(/[%_\\]/g, "\\$&");
|
||||
};
|
||||
Vendored
+1
-1
@@ -125,7 +125,7 @@
|
||||
"@huggingface/transformers": "^3.0.2",
|
||||
"@types/node": "^22.9.0",
|
||||
"@types/readable-stream": "^4.0.15",
|
||||
"bunchee": "6.0.3",
|
||||
"bunchee": "6.2.0",
|
||||
"gpt-tokenizer": "^2.6.2",
|
||||
"pathe": "^1.1.2",
|
||||
"vitest": "^2.1.5"
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
# @llamaindex/experimental
|
||||
|
||||
## 0.0.146
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9f8ad37]
|
||||
- llamaindex@0.8.34
|
||||
|
||||
## 0.0.145
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.8.33
|
||||
|
||||
## 0.0.144
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.143
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d211b7a]
|
||||
- Updated dependencies [0ebbfc1]
|
||||
- llamaindex@0.8.31
|
||||
|
||||
## 0.0.142
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/experimental",
|
||||
"description": "Experimental package for LlamaIndexTS",
|
||||
"version": "0.0.142",
|
||||
"version": "0.0.146",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,102 @@
|
||||
# llamaindex
|
||||
|
||||
## 0.8.34
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 9f8ad37: fix: missing peer deps in llamaindex
|
||||
- Updated dependencies [7265f74]
|
||||
- @llamaindex/openai@0.1.48
|
||||
- @llamaindex/clip@0.0.32
|
||||
- @llamaindex/deepinfra@0.0.32
|
||||
- @llamaindex/groq@0.0.47
|
||||
- @llamaindex/huggingface@0.0.32
|
||||
- @llamaindex/vllm@0.0.18
|
||||
|
||||
## 0.8.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [2019a04]
|
||||
- Updated dependencies [e38e474]
|
||||
- Updated dependencies [067a489]
|
||||
- @llamaindex/openai@0.1.47
|
||||
- @llamaindex/google@0.0.3
|
||||
- @llamaindex/anthropic@0.0.31
|
||||
- @llamaindex/clip@0.0.31
|
||||
- @llamaindex/deepinfra@0.0.31
|
||||
- @llamaindex/groq@0.0.46
|
||||
- @llamaindex/huggingface@0.0.31
|
||||
- @llamaindex/vllm@0.0.17
|
||||
|
||||
## 0.8.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 34faf48: chore: move vector stores to their own packages
|
||||
- 4df1fe6: chore: migrate llamaindex llms and embeddings to their own packages
|
||||
- 9456616: refactor: @llamaindex/postgres
|
||||
- 1931bbc: refactor: @llamaindex/azure
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [d6c270e]
|
||||
- Updated dependencies [1892e1c]
|
||||
- Updated dependencies [1931bbc]
|
||||
- Updated dependencies [8744796]
|
||||
- @llamaindex/astra@0.0.2
|
||||
- @llamaindex/chroma@0.0.2
|
||||
- @llamaindex/milvus@0.0.2
|
||||
- @llamaindex/mongodb@0.0.2
|
||||
- @llamaindex/pinecone@0.0.2
|
||||
- @llamaindex/qdrant@0.0.2
|
||||
- @llamaindex/upstash@0.0.2
|
||||
- @llamaindex/weaviate@0.0.2
|
||||
- @llamaindex/google@0.0.2
|
||||
- @llamaindex/mistral@0.0.2
|
||||
- @llamaindex/core@0.4.21
|
||||
- @llamaindex/cloud@2.0.22
|
||||
- @llamaindex/openai@0.1.46
|
||||
- @llamaindex/azure@0.0.2
|
||||
- @llamaindex/node-parser@0.0.22
|
||||
- @llamaindex/anthropic@0.0.30
|
||||
- @llamaindex/clip@0.0.30
|
||||
- @llamaindex/cohere@0.0.2
|
||||
- @llamaindex/deepinfra@0.0.30
|
||||
- @llamaindex/huggingface@0.0.30
|
||||
- @llamaindex/mixedbread@0.0.2
|
||||
- @llamaindex/ollama@0.0.37
|
||||
- @llamaindex/portkey-ai@0.0.30
|
||||
- @llamaindex/replicate@0.0.30
|
||||
- @llamaindex/postgres@0.0.30
|
||||
- @llamaindex/readers@1.0.23
|
||||
- @llamaindex/groq@0.0.45
|
||||
- @llamaindex/vllm@0.0.16
|
||||
|
||||
## 0.8.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- d211b7a: added support for tool calls with results in message history for athropic agent
|
||||
- 0ebbfc1: fix: clean up docstore when generating embedding fail
|
||||
- Updated dependencies [5dec9f9]
|
||||
- Updated dependencies [fd9c829]
|
||||
- Updated dependencies [d211b7a]
|
||||
- @llamaindex/cloud@2.0.21
|
||||
- @llamaindex/anthropic@0.0.29
|
||||
- @llamaindex/core@0.4.20
|
||||
- @llamaindex/node-parser@0.0.21
|
||||
- @llamaindex/clip@0.0.29
|
||||
- @llamaindex/deepinfra@0.0.29
|
||||
- @llamaindex/huggingface@0.0.29
|
||||
- @llamaindex/ollama@0.0.36
|
||||
- @llamaindex/openai@0.1.45
|
||||
- @llamaindex/portkey-ai@0.0.29
|
||||
- @llamaindex/replicate@0.0.29
|
||||
- @llamaindex/readers@1.0.22
|
||||
- @llamaindex/groq@0.0.44
|
||||
- @llamaindex/vllm@0.0.15
|
||||
|
||||
## 0.8.30
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "llamaindex",
|
||||
"version": "0.8.30",
|
||||
"version": "0.8.34",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
"keywords": [
|
||||
@@ -20,17 +20,6 @@
|
||||
"llamaindex"
|
||||
],
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "0.32.1",
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
"@aws-sdk/client-sso-oidc": "^3.693.0",
|
||||
"@azure/cosmos": "^4.1.1",
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@azure/search-documents": "^12.1.0",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@discoveryjs/json-ext": "^0.6.1",
|
||||
"@google-cloud/vertexai": "1.9.0",
|
||||
"@google/generative-ai": "0.21.0",
|
||||
"@grpc/grpc-js": "^1.12.2",
|
||||
"@llamaindex/anthropic": "workspace:*",
|
||||
"@llamaindex/clip": "workspace:*",
|
||||
"@llamaindex/cloud": "workspace:*",
|
||||
@@ -46,55 +35,32 @@
|
||||
"@llamaindex/readers": "workspace:*",
|
||||
"@llamaindex/replicate": "workspace:*",
|
||||
"@llamaindex/vllm": "workspace:*",
|
||||
"@mistralai/mistralai": "^1.3.4",
|
||||
"@mixedbread-ai/sdk": "^2.2.11",
|
||||
"@pinecone-database/pinecone": "^4.0.0",
|
||||
"@qdrant/js-client-rest": "^1.11.0",
|
||||
"@llamaindex/postgres": "workspace:*",
|
||||
"@llamaindex/azure": "workspace:*",
|
||||
"@llamaindex/astra": "workspace:*",
|
||||
"@llamaindex/milvus": "workspace:*",
|
||||
"@llamaindex/chroma": "workspace:*",
|
||||
"@llamaindex/mongodb": "workspace:*",
|
||||
"@llamaindex/pinecone": "workspace:*",
|
||||
"@llamaindex/qdrant": "workspace:*",
|
||||
"@llamaindex/upstash": "workspace:*",
|
||||
"@llamaindex/weaviate": "workspace:*",
|
||||
"@llamaindex/google": "workspace:*",
|
||||
"@llamaindex/mistral": "workspace:*",
|
||||
"@llamaindex/mixedbread": "workspace:*",
|
||||
"@llamaindex/cohere": "workspace:*",
|
||||
"@types/lodash": "^4.17.7",
|
||||
"@types/node": "^22.9.0",
|
||||
"@types/pg": "^8.11.8",
|
||||
"@upstash/vector": "^1.1.5",
|
||||
"@zilliz/milvus2-sdk-node": "^2.4.6",
|
||||
"ajv": "^8.17.1",
|
||||
"assemblyai": "^4.8.0",
|
||||
"chromadb": "1.9.2",
|
||||
"chromadb-default-embed": "^2.13.2",
|
||||
"cohere-ai": "7.14.0",
|
||||
"gpt-tokenizer": "^2.6.2",
|
||||
"groq-sdk": "^0.8.0",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"lodash": "^4.17.21",
|
||||
"magic-bytes.js": "^1.10.0",
|
||||
"mongodb": "^6.7.0",
|
||||
"openai": "^4.73.1",
|
||||
"pathe": "^1.1.2",
|
||||
"rake-modified": "^1.0.8",
|
||||
"weaviate-client": "^3.2.3",
|
||||
"wikipedia": "^2.1.2",
|
||||
"wink-nlp": "^2.3.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"pg": "^8.12.0",
|
||||
"pgvector": "0.2.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"pg": {
|
||||
"optional": true
|
||||
},
|
||||
"pgvector": {
|
||||
"optional": true
|
||||
}
|
||||
"gpt-tokenizer": "^2.6.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@swc/cli": "^0.5.0",
|
||||
"@swc/core": "^1.9.2",
|
||||
"@vercel/postgres": "^0.10.0",
|
||||
"concurrently": "^9.1.0",
|
||||
"glob": "^11.0.0",
|
||||
"pg": "^8.12.0",
|
||||
"pgvector": "0.2.0",
|
||||
"postgres": "^3.4.4",
|
||||
"typescript": "^5.7.2"
|
||||
},
|
||||
"engines": {
|
||||
|
||||
@@ -13,11 +13,6 @@ import {
|
||||
} from "@llamaindex/core/node-parser";
|
||||
import { AsyncLocalStorage } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "./ServiceContext.js";
|
||||
import {
|
||||
getEmbeddedModel,
|
||||
setEmbeddedModel,
|
||||
withEmbeddedModel,
|
||||
} from "./internal/settings/EmbedModel.js";
|
||||
|
||||
export type PromptConfig = {
|
||||
llm?: string;
|
||||
@@ -84,15 +79,15 @@ class GlobalSettings implements Config {
|
||||
}
|
||||
|
||||
get embedModel(): BaseEmbedding {
|
||||
return getEmbeddedModel();
|
||||
return CoreSettings.embedModel;
|
||||
}
|
||||
|
||||
set embedModel(embedModel: BaseEmbedding) {
|
||||
setEmbeddedModel(embedModel);
|
||||
CoreSettings.embedModel = embedModel;
|
||||
}
|
||||
|
||||
withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
|
||||
return withEmbeddedModel(embedModel, fn);
|
||||
return CoreSettings.withEmbedModel(embedModel, fn);
|
||||
}
|
||||
|
||||
get nodeParser(): NodeParser {
|
||||
|
||||
@@ -1,39 +1 @@
|
||||
import { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { GeminiSession, GeminiSessionStore } from "../llm/gemini/base.js";
|
||||
import { GEMINI_BACKENDS } from "../llm/gemini/types.js";
|
||||
|
||||
export enum GEMINI_EMBEDDING_MODEL {
|
||||
EMBEDDING_001 = "embedding-001",
|
||||
TEXT_EMBEDDING_004 = "text-embedding-004",
|
||||
}
|
||||
|
||||
/**
|
||||
* GeminiEmbedding is an alias for Gemini that implements the BaseEmbedding interface.
|
||||
* Note: Vertex SDK currently does not support embeddings
|
||||
*/
|
||||
export class GeminiEmbedding extends BaseEmbedding {
|
||||
model: GEMINI_EMBEDDING_MODEL;
|
||||
session: GeminiSession;
|
||||
|
||||
constructor(init?: Partial<GeminiEmbedding>) {
|
||||
super();
|
||||
this.model = init?.model ?? GEMINI_EMBEDDING_MODEL.EMBEDDING_001;
|
||||
this.session =
|
||||
init?.session ??
|
||||
(GeminiSessionStore.get({
|
||||
backend: GEMINI_BACKENDS.GOOGLE,
|
||||
}) as GeminiSession);
|
||||
}
|
||||
|
||||
private async getEmbedding(prompt: string): Promise<number[]> {
|
||||
const client = this.session.getGenerativeModel({
|
||||
model: this.model,
|
||||
});
|
||||
const result = await client.embedContent(prompt);
|
||||
return result.embedding.values;
|
||||
}
|
||||
|
||||
getTextEmbedding(text: string): Promise<number[]> {
|
||||
return this.getEmbedding(text);
|
||||
}
|
||||
}
|
||||
export { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
|
||||
|
||||
@@ -1,33 +1,4 @@
|
||||
import { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { MistralAISession } from "../llm/mistral.js";
|
||||
|
||||
export enum MistralAIEmbeddingModelType {
|
||||
MISTRAL_EMBED = "mistral-embed",
|
||||
}
|
||||
|
||||
export class MistralAIEmbedding extends BaseEmbedding {
|
||||
model: MistralAIEmbeddingModelType;
|
||||
apiKey?: string;
|
||||
|
||||
private session: MistralAISession;
|
||||
|
||||
constructor(init?: Partial<MistralAIEmbedding>) {
|
||||
super();
|
||||
this.model = MistralAIEmbeddingModelType.MISTRAL_EMBED;
|
||||
this.session = new MistralAISession(init);
|
||||
}
|
||||
|
||||
private async getMistralAIEmbedding(input: string) {
|
||||
const client = await this.session.getClient();
|
||||
const { data } = await client.embeddings({
|
||||
model: this.model,
|
||||
input: [input],
|
||||
});
|
||||
|
||||
return data[0].embedding;
|
||||
}
|
||||
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
return this.getMistralAIEmbedding(text);
|
||||
}
|
||||
}
|
||||
export {
|
||||
MistralAIEmbedding,
|
||||
MistralAIEmbeddingModelType,
|
||||
} from "@llamaindex/mistral";
|
||||
|
||||
@@ -1,178 +1,4 @@
|
||||
import { BaseEmbedding, type EmbeddingInfo } from "@llamaindex/core/embeddings";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import { MixedbreadAI, MixedbreadAIClient } from "@mixedbread-ai/sdk";
|
||||
|
||||
type EmbeddingsRequestWithoutInput = Omit<
|
||||
MixedbreadAI.EmbeddingsRequest,
|
||||
"input"
|
||||
>;
|
||||
|
||||
/**
|
||||
* Interface extending EmbeddingsParams with additional
|
||||
* parameters specific to the MixedbreadAIEmbeddings class.
|
||||
*/
|
||||
export interface MixedbreadAIEmbeddingsParams
|
||||
extends Omit<EmbeddingsRequestWithoutInput, "model"> {
|
||||
/**
|
||||
* The model to use for generating embeddings.
|
||||
* @default {"mixedbread-ai/mxbai-embed-large-v1"}
|
||||
*/
|
||||
model?: string;
|
||||
|
||||
/**
|
||||
* The API key to use.
|
||||
* @default {process.env.MXBAI_API_KEY}
|
||||
*/
|
||||
apiKey?: string;
|
||||
|
||||
/**
|
||||
* The base URL for the API.
|
||||
*/
|
||||
baseUrl?: string;
|
||||
|
||||
/**
|
||||
* The maximum number of documents to embed in a single request.
|
||||
* @default {128}
|
||||
*/
|
||||
embedBatchSize?: number;
|
||||
|
||||
/**
|
||||
* The embed info for the model.
|
||||
*/
|
||||
embedInfo?: EmbeddingInfo;
|
||||
|
||||
/**
|
||||
* The maximum number of retries to attempt.
|
||||
* @default {3}
|
||||
*/
|
||||
maxRetries?: number;
|
||||
|
||||
/**
|
||||
* Timeouts for the request.
|
||||
*/
|
||||
timeoutInSeconds?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for generating embeddings using the mixedbread ai API.
|
||||
*
|
||||
* This class leverages the model "mixedbread-ai/mxbai-embed-large-v1" to generate
|
||||
* embeddings for text documents. The embeddings can be used for various NLP tasks
|
||||
* such as similarity comparison, clustering, or as features in machine learning models.
|
||||
*
|
||||
* @example
|
||||
* const mxbai = new MixedbreadAIEmbeddings({ apiKey: 'your-api-key' });
|
||||
* const texts = ["Baking bread is fun", "I love baking"];
|
||||
* const result = await mxbai.getTextEmbeddings(texts);
|
||||
* console.log(result);
|
||||
*
|
||||
* @example
|
||||
* const mxbai = new MixedbreadAIEmbeddings({
|
||||
* apiKey: 'your-api-key',
|
||||
* model: 'mixedbread-ai/mxbai-embed-large-v1',
|
||||
* encodingFormat: MixedbreadAI.EncodingFormat.Binary,
|
||||
* dimensions: 512,
|
||||
* normalized: true,
|
||||
* });
|
||||
* const query = "Represent this sentence for searching relevant passages: Is baking bread fun?";
|
||||
* const result = await mxbai.getTextEmbedding(query);
|
||||
* console.log(result);
|
||||
*/
|
||||
export class MixedbreadAIEmbeddings extends BaseEmbedding {
|
||||
requestParams: EmbeddingsRequestWithoutInput;
|
||||
requestOptions: MixedbreadAIClient.RequestOptions;
|
||||
private client: MixedbreadAIClient;
|
||||
|
||||
/**
|
||||
* Constructor for MixedbreadAIEmbeddings.
|
||||
* @param {Partial<MixedbreadAIEmbeddingsParams>} params - An optional object with properties to configure the instance.
|
||||
* @throws {Error} If the API key is not provided or found in the environment variables.
|
||||
* @throws {Error} If the batch size exceeds 256.
|
||||
*/
|
||||
constructor(params?: Partial<MixedbreadAIEmbeddingsParams>) {
|
||||
super();
|
||||
|
||||
const apiKey = params?.apiKey ?? getEnv("MXBAI_API_KEY");
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"mixedbread ai API key not found. Either provide it in the constructor or set the 'MXBAI_API_KEY' environment variable.",
|
||||
);
|
||||
}
|
||||
if (params?.embedBatchSize && params?.embedBatchSize > 256) {
|
||||
throw new Error(
|
||||
"The maximum batch size for mixedbread ai embeddings API is 256.",
|
||||
);
|
||||
}
|
||||
|
||||
this.embedBatchSize = params?.embedBatchSize ?? 128;
|
||||
if (params?.embedInfo) {
|
||||
this.embedInfo = params?.embedInfo;
|
||||
}
|
||||
this.requestParams = <EmbeddingsRequestWithoutInput>{
|
||||
model: params?.model ?? "mixedbread-ai/mxbai-embed-large-v1",
|
||||
normalized: params?.normalized,
|
||||
dimensions: params?.dimensions,
|
||||
encodingFormat: params?.encodingFormat,
|
||||
truncationStrategy: params?.truncationStrategy,
|
||||
prompt: params?.prompt,
|
||||
};
|
||||
this.requestOptions = {
|
||||
timeoutInSeconds: params?.timeoutInSeconds,
|
||||
maxRetries: params?.maxRetries ?? 3,
|
||||
// Support for this already exists in the python sdk and will be added to the js sdk soon
|
||||
// @ts-expect-error fixme
|
||||
additionalHeaders: {
|
||||
"user-agent": "@mixedbread-ai/llamaindex-ts-sdk",
|
||||
},
|
||||
};
|
||||
this.client = new MixedbreadAIClient(
|
||||
params?.baseUrl
|
||||
? {
|
||||
apiKey,
|
||||
environment: params?.baseUrl,
|
||||
}
|
||||
: {
|
||||
apiKey,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an embedding for a single text.
|
||||
* @param {string} text - A string to generate an embedding for.
|
||||
* @returns {Promise<number[]>} A Promise that resolves to an array of numbers representing the embedding.
|
||||
*
|
||||
* @example
|
||||
* const query = "Represent this sentence for searching relevant passages: Is baking bread fun?";
|
||||
* const result = await mxbai.getTextEmbedding(text);
|
||||
* console.log(result);
|
||||
*/
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
return (await this.getTextEmbeddings([text]))[0]!;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates embeddings for an array of texts.
|
||||
* @param {string[]} texts - An array of strings to generate embeddings for.
|
||||
* @returns {Promise<Array<number[]>>} A Promise that resolves to an array of embeddings.
|
||||
*
|
||||
* @example
|
||||
* const texts = ["Baking bread is fun", "I love baking"];
|
||||
* const result = await mxbai.getTextEmbeddings(texts);
|
||||
* console.log(result);
|
||||
*/
|
||||
getTextEmbeddings = async (texts: string[]): Promise<Array<number[]>> => {
|
||||
if (texts.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const response = await this.client.embeddings(
|
||||
{
|
||||
...this.requestParams,
|
||||
input: texts,
|
||||
},
|
||||
this.requestOptions,
|
||||
);
|
||||
return response.data.map((d) => d.embedding as number[]);
|
||||
};
|
||||
}
|
||||
export {
|
||||
MixedbreadAIEmbeddings,
|
||||
type MixedbreadAIEmbeddingsParams,
|
||||
} from "@llamaindex/mixedbread";
|
||||
|
||||
@@ -2,7 +2,7 @@ export * from "@llamaindex/core/embeddings";
|
||||
export { ClipEmbedding, ClipEmbeddingModelType } from "./ClipEmbedding.js";
|
||||
export { DeepInfraEmbedding } from "./DeepInfraEmbedding.js";
|
||||
export { FireworksEmbedding } from "./fireworks.js";
|
||||
export * from "./GeminiEmbedding.js";
|
||||
export { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "./GeminiEmbedding.js";
|
||||
export * from "./HuggingFaceEmbedding.js";
|
||||
export * from "./JinaAIEmbedding.js";
|
||||
export * from "./MistralAIEmbedding.js";
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
//#region initial setup for OpenAI
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
|
||||
import { Settings } from "./Settings.js";
|
||||
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
Settings.llm;
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
Settings.embedModel;
|
||||
} catch {
|
||||
Settings.llm = new OpenAI();
|
||||
Settings.embedModel = new OpenAIEmbedding();
|
||||
}
|
||||
|
||||
//#endregion
|
||||
|
||||
@@ -7,12 +7,14 @@ export {
|
||||
HuggingFaceEmbeddingModelType,
|
||||
} from "./embeddings/HuggingFaceEmbedding.js";
|
||||
|
||||
export { type VertexGeminiSessionOptions } from "./llm/gemini/types.js";
|
||||
export { GeminiVertexSession } from "./llm/gemini/vertex.js";
|
||||
export {
|
||||
GeminiVertexSession,
|
||||
type VertexGeminiSessionOptions,
|
||||
} from "@llamaindex/google";
|
||||
|
||||
// Expose AzureDynamicSessionTool for node.js runtime only
|
||||
export { AzureDynamicSessionTool } from "@llamaindex/azure";
|
||||
export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js";
|
||||
export { AzureDynamicSessionTool } from "./tools/AzureDynamicSessionTool.node.js";
|
||||
|
||||
// Don't export vector store modules for non-node.js runtime on top level,
|
||||
// as we cannot guarantee that they will work in other environments
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
} from "@llamaindex/core/schema";
|
||||
import type { BaseIndexStore } from "@llamaindex/core/storage/index-store";
|
||||
import { extractText } from "@llamaindex/core/utils";
|
||||
import { VectorStoreQueryMode } from "@llamaindex/core/vector-store";
|
||||
import type { ServiceContext } from "../../ServiceContext.js";
|
||||
import { nodeParserFromSettingsOrContext } from "../../Settings.js";
|
||||
import { RetrieverQueryEngine } from "../../engines/query/RetrieverQueryEngine.js";
|
||||
@@ -38,7 +39,6 @@ import type {
|
||||
VectorStoreByType,
|
||||
VectorStoreQueryResult,
|
||||
} from "../../vector-store/index.js";
|
||||
import { VectorStoreQueryMode } from "../../vector-store/types.js";
|
||||
import type { BaseIndexInit } from "../BaseIndex.js";
|
||||
import { BaseIndex } from "../BaseIndex.js";
|
||||
|
||||
@@ -237,7 +237,12 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
|
||||
if (args.logProgress) {
|
||||
console.log("Finished parsing documents.");
|
||||
}
|
||||
return await this.init(args);
|
||||
try {
|
||||
return await this.init(args);
|
||||
} catch (error) {
|
||||
await docStoreStrategy.rollback(args.storageContext.docStore, args.nodes);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async fromVectorStores(
|
||||
|
||||
@@ -10,7 +10,7 @@ import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
} from "@llamaindex/core/vector-store";
|
||||
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
|
||||
import {
|
||||
DocStoreStrategy,
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import { BaseNode } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import { RollbackableTransformComponent } from "./rollback.js";
|
||||
|
||||
/**
|
||||
* Handle doc store duplicates by checking all hashes.
|
||||
*/
|
||||
export class DuplicatesStrategy extends TransformComponent {
|
||||
export class DuplicatesStrategy extends RollbackableTransformComponent {
|
||||
private docStore: BaseDocumentStore;
|
||||
|
||||
constructor(docStore: BaseDocumentStore) {
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import { BaseNode } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
|
||||
import { classify } from "./classify.js";
|
||||
import { RollbackableTransformComponent } from "./rollback.js";
|
||||
|
||||
/**
|
||||
* Handle docstore upserts by checking hashes and ids.
|
||||
* Identify missing docs and delete them from docstore and vector store
|
||||
*/
|
||||
export class UpsertsAndDeleteStrategy extends TransformComponent {
|
||||
export class UpsertsAndDeleteStrategy extends RollbackableTransformComponent {
|
||||
protected docStore: BaseDocumentStore;
|
||||
protected vectorStores: BaseVectorStore[] | undefined;
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import { BaseNode } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
|
||||
import { classify } from "./classify.js";
|
||||
import { RollbackableTransformComponent } from "./rollback.js";
|
||||
|
||||
/**
|
||||
* Handles doc store upserts by checking hashes and ids.
|
||||
*/
|
||||
export class UpsertsStrategy extends TransformComponent {
|
||||
export class UpsertsStrategy extends RollbackableTransformComponent {
|
||||
protected docStore: BaseDocumentStore;
|
||||
protected vectorStores: BaseVectorStore[] | undefined;
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
|
||||
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
|
||||
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
|
||||
import { UpsertsStrategy } from "./UpsertsStrategy.js";
|
||||
import { RollbackableTransformComponent } from "./rollback.js";
|
||||
|
||||
/**
|
||||
* Document de-deduplication strategies work by comparing the hashes or ids stored in the document store.
|
||||
@@ -19,7 +19,7 @@ export enum DocStoreStrategy {
|
||||
NONE = "none", // no-op strategy
|
||||
}
|
||||
|
||||
class NoOpStrategy extends TransformComponent {
|
||||
class NoOpStrategy extends RollbackableTransformComponent {
|
||||
constructor() {
|
||||
super(async (nodes) => nodes);
|
||||
}
|
||||
@@ -29,7 +29,7 @@ export function createDocStoreStrategy(
|
||||
docStoreStrategy: DocStoreStrategy,
|
||||
docStore?: BaseDocumentStore,
|
||||
vectorStores: BaseVectorStore[] = [],
|
||||
): TransformComponent {
|
||||
): RollbackableTransformComponent {
|
||||
if (docStoreStrategy === DocStoreStrategy.NONE) {
|
||||
return new NoOpStrategy();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "../../index.edge.js";
|
||||
import { classify } from "./classify.js";
|
||||
|
||||
export class RollbackableTransformComponent extends TransformComponent {
|
||||
// Remove unused docs from the doc store. It is useful in case
|
||||
// generating embeddings fails and we want to remove the unused docs
|
||||
// TODO: override this in UpsertsStrategy if we want to revert removed docs also
|
||||
public async rollback(
|
||||
docStore: BaseDocumentStore,
|
||||
nodes: BaseNode[],
|
||||
): Promise<void> {
|
||||
const { unusedDocs } = await classify(docStore, nodes);
|
||||
for (const docId of unusedDocs) {
|
||||
await docStore.deleteDocument(docId, false);
|
||||
}
|
||||
docStore.persist();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
export * from "@llamaindex/google";
|
||||
@@ -6,11 +6,12 @@ export {
|
||||
Anthropic,
|
||||
} from "./anthropic.js";
|
||||
export { FireworksLLM } from "./fireworks.js";
|
||||
export { Gemini, GeminiSession } from "./gemini/base.js";
|
||||
export {
|
||||
GEMINI_MODEL,
|
||||
Gemini,
|
||||
GeminiSession,
|
||||
type GoogleGeminiSessionOptions,
|
||||
} from "./gemini/types.js";
|
||||
} from "./google.js";
|
||||
export * from "./groq.js";
|
||||
export { HuggingFaceInferenceAPI, HuggingFaceLLM } from "./huggingface.js";
|
||||
export {
|
||||
|
||||
@@ -1,138 +1 @@
|
||||
import {
|
||||
BaseLLM,
|
||||
type ChatMessage,
|
||||
type ChatResponse,
|
||||
type ChatResponseChunk,
|
||||
type LLMChatParamsNonStreaming,
|
||||
type LLMChatParamsStreaming,
|
||||
} from "@llamaindex/core/llms";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
|
||||
export const ALL_AVAILABLE_MISTRAL_MODELS = {
|
||||
"mistral-tiny": { contextWindow: 32000 },
|
||||
"mistral-small": { contextWindow: 32000 },
|
||||
"mistral-medium": { contextWindow: 32000 },
|
||||
};
|
||||
|
||||
export class MistralAISession {
|
||||
apiKey: string;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private client: any;
|
||||
|
||||
constructor(init?: { apiKey?: string | undefined }) {
|
||||
if (init?.apiKey) {
|
||||
this.apiKey = init?.apiKey;
|
||||
} else {
|
||||
this.apiKey = getEnv("MISTRAL_API_KEY")!;
|
||||
}
|
||||
if (!this.apiKey) {
|
||||
throw new Error("Set Mistral API key in MISTRAL_API_KEY env variable"); // Overriding MistralAI package's error message
|
||||
}
|
||||
}
|
||||
|
||||
async getClient() {
|
||||
const { Mistral } = await import("@mistralai/mistralai");
|
||||
if (!this.client) {
|
||||
this.client = new Mistral({
|
||||
apiKey: this.apiKey,
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* MistralAI LLM implementation
|
||||
*/
|
||||
export class MistralAI extends BaseLLM {
|
||||
// Per completion MistralAI params
|
||||
model: keyof typeof ALL_AVAILABLE_MISTRAL_MODELS;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number | undefined;
|
||||
apiKey?: string;
|
||||
safeMode: boolean;
|
||||
randomSeed?: number | undefined;
|
||||
|
||||
private session: MistralAISession;
|
||||
|
||||
constructor(init?: Partial<MistralAI>) {
|
||||
super();
|
||||
this.model = init?.model ?? "mistral-small";
|
||||
this.temperature = init?.temperature ?? 0.1;
|
||||
this.topP = init?.topP ?? 1;
|
||||
this.maxTokens = init?.maxTokens ?? undefined;
|
||||
this.safeMode = init?.safeMode ?? false;
|
||||
this.randomSeed = init?.randomSeed ?? undefined;
|
||||
this.session = new MistralAISession(init);
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow: ALL_AVAILABLE_MISTRAL_MODELS[this.model].contextWindow,
|
||||
tokenizer: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private buildParams(messages: ChatMessage[]): any {
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
maxTokens: this.maxTokens,
|
||||
topP: this.topP,
|
||||
safeMode: this.safeMode,
|
||||
randomSeed: this.randomSeed,
|
||||
messages,
|
||||
};
|
||||
}
|
||||
|
||||
chat(
|
||||
params: LLMChatParamsStreaming,
|
||||
): Promise<AsyncIterable<ChatResponseChunk>>;
|
||||
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
|
||||
async chat(
|
||||
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
|
||||
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
|
||||
const { messages, stream } = params;
|
||||
// Streaming
|
||||
if (stream) {
|
||||
return this.streamChat(params);
|
||||
}
|
||||
// Non-streaming
|
||||
const client = await this.session.getClient();
|
||||
const response = await client.chat(this.buildParams(messages));
|
||||
const message = response.choices[0].message;
|
||||
return {
|
||||
raw: response,
|
||||
message,
|
||||
};
|
||||
}
|
||||
|
||||
protected async *streamChat({
|
||||
messages,
|
||||
}: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
|
||||
const client = await this.session.getClient();
|
||||
const chunkStream = await client.chatStream(this.buildParams(messages));
|
||||
|
||||
//Indices
|
||||
let idx_counter: number = 0;
|
||||
for await (const part of chunkStream) {
|
||||
if (!part.choices.length) continue;
|
||||
|
||||
part.choices[0].index = idx_counter;
|
||||
|
||||
idx_counter++;
|
||||
|
||||
yield {
|
||||
raw: part,
|
||||
delta: part.choices[0].delta.content ?? "",
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/mistral";
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
export * from "./CohereRerank.js";
|
||||
export * from "@llamaindex/cohere";
|
||||
export {
|
||||
MixedbreadAIReranker,
|
||||
type MixedbreadAIRerankerParams,
|
||||
} from "@llamaindex/mixedbread";
|
||||
export * from "./JinaAIReranker.js";
|
||||
export * from "./MixedbreadAIReranker.js";
|
||||
|
||||
@@ -9,13 +9,13 @@ import {
|
||||
BaseIndexStore,
|
||||
SimpleIndexStore,
|
||||
} from "@llamaindex/core/storage/index-store";
|
||||
import { path } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "../ServiceContext.js";
|
||||
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
} from "@llamaindex/core/vector-store";
|
||||
import { path } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "../ServiceContext.js";
|
||||
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
|
||||
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
|
||||
|
||||
export interface StorageContext {
|
||||
|
||||
@@ -3,13 +3,13 @@ import {
|
||||
DEFAULT_NAMESPACE,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "@llamaindex/core/global";
|
||||
import { KVDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import {
|
||||
BaseInMemoryKVStore,
|
||||
SimpleKVStore,
|
||||
} from "@llamaindex/core/storage/kv-store";
|
||||
import { path } from "@llamaindex/env";
|
||||
import _ from "lodash";
|
||||
import { KVDocumentStore } from "./KVDocumentStore.js";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
type SaveDict = Record<string, any>;
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
export * from "@llamaindex/azure/storage";
|
||||
export * from "@llamaindex/core/storage/chat-store";
|
||||
export * from "@llamaindex/core/storage/doc-store";
|
||||
export * from "@llamaindex/core/storage/index-store";
|
||||
export * from "@llamaindex/core/storage/kv-store";
|
||||
export * from "./chatStore/AzureCosmosMongovCoreChatStore.js";
|
||||
export * from "./chatStore/AzureCosmosNoSqlChatStore.js";
|
||||
export * from "./docStore/AzureCosmosMongovCoreDocumentStore.js";
|
||||
export * from "./docStore/AzureCosmosNoSqlDocumentStore.js";
|
||||
export { PostgresDocumentStore } from "./docStore/PostgresDocumentStore.js";
|
||||
export {
|
||||
PostgresDocumentStore,
|
||||
PostgresIndexStore,
|
||||
PostgresKVStore,
|
||||
} from "@llamaindex/postgres";
|
||||
export { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
|
||||
export * from "./FileSystem.js";
|
||||
export * from "./indexStore/AzureCosmosMongovCoreIndexStore.js";
|
||||
export * from "./indexStore/AzureCosmosNoSqlIndexStore.js";
|
||||
export { PostgresIndexStore } from "./indexStore/PostgresIndexStore.js";
|
||||
export * from "./kvStore/AzureCosmosMongovCoreKVStore.js";
|
||||
export * from "./kvStore/AzureCosmosNoSqlKVStore.js";
|
||||
export { PostgresKVStore } from "./kvStore/PostgresKVStore.js";
|
||||
|
||||
export * from "./StorageContext.js";
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
import {
|
||||
AzureDynamicSessionTool,
|
||||
type AzureDynamicSessionToolParams,
|
||||
} from "./AzureDynamicSessionTool.node.js";
|
||||
import { WikipediaTool, type WikipediaToolParams } from "./WikipediaTool.js";
|
||||
} from "@llamaindex/azure";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-namespace
|
||||
export namespace ToolsFactory {
|
||||
type ToolsMap = {
|
||||
[Tools.Wikipedia]: typeof WikipediaTool;
|
||||
[Tools.AzureCodeInterpreter]: typeof AzureDynamicSessionTool;
|
||||
};
|
||||
|
||||
export enum Tools {
|
||||
Wikipedia = "wikipedia.WikipediaToolSpec",
|
||||
AzureCodeInterpreter = "azure_code_interpreter.AzureCodeInterpreterToolSpec",
|
||||
}
|
||||
|
||||
@@ -20,12 +17,6 @@ export namespace ToolsFactory {
|
||||
key: Tool,
|
||||
...params: ConstructorParameters<ToolsMap[Tool]>
|
||||
): Promise<InstanceType<ToolsMap[Tool]>> {
|
||||
if (key === Tools.Wikipedia) {
|
||||
return new WikipediaTool(
|
||||
...(params as WikipediaToolParams[]),
|
||||
) as InstanceType<ToolsMap[Tool]>;
|
||||
}
|
||||
|
||||
if (key === Tools.AzureCodeInterpreter) {
|
||||
return new AzureDynamicSessionTool(
|
||||
...(params as AzureDynamicSessionToolParams[]),
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
export * from "@llamaindex/core/tools";
|
||||
export * from "./QueryEngineTool.js";
|
||||
export * from "./WikipediaTool.js";
|
||||
|
||||
@@ -1,270 +1 @@
|
||||
import {
|
||||
Collection,
|
||||
DataAPIClient,
|
||||
Db,
|
||||
type Filter,
|
||||
type FindOptions,
|
||||
type SomeDoc,
|
||||
} from "@datastax/astra-db-ts";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
} from "./utils.js";
|
||||
|
||||
export class AstraDBVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
|
||||
idKey: string;
|
||||
contentKey: string;
|
||||
|
||||
private astraClient: DataAPIClient;
|
||||
private astraDB: Db;
|
||||
private collection: Collection | undefined;
|
||||
|
||||
constructor(
|
||||
init?: Partial<AstraDBVectorStore> & {
|
||||
params?: {
|
||||
token: string;
|
||||
endpoint: string;
|
||||
namespace?: string;
|
||||
};
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
const token = init?.params?.token ?? getEnv("ASTRA_DB_APPLICATION_TOKEN");
|
||||
const endpoint = init?.params?.endpoint ?? getEnv("ASTRA_DB_API_ENDPOINT");
|
||||
|
||||
if (!token) {
|
||||
throw new Error(
|
||||
"Must specify ASTRA_DB_APPLICATION_TOKEN via env variable.",
|
||||
);
|
||||
}
|
||||
if (!endpoint) {
|
||||
throw new Error("Must specify ASTRA_DB_API_ENDPOINT via env variable.");
|
||||
}
|
||||
const namespace =
|
||||
init?.params?.namespace ??
|
||||
getEnv("ASTRA_DB_NAMESPACE") ??
|
||||
"default_keyspace";
|
||||
this.astraClient = new DataAPIClient(token, {
|
||||
caller: ["LlamaIndexTS"],
|
||||
});
|
||||
this.astraDB = this.astraClient.db(endpoint, { namespace });
|
||||
|
||||
this.idKey = init?.idKey ?? "_id";
|
||||
this.contentKey = init?.contentKey ?? "content";
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new collection in your Astra DB vector database and connects to it.
|
||||
* You must call this method or `connect` before adding, deleting, or querying.
|
||||
*
|
||||
* @param collection - Your new collection's name
|
||||
* @param options - CreateCollectionOptions used to set the number of vector dimensions and similarity metric
|
||||
* @returns Promise that resolves if the creation did not throw an error.
|
||||
*/
|
||||
async createAndConnect(
|
||||
collection: string,
|
||||
options?: Parameters<Db["createCollection"]>[1],
|
||||
): Promise<void> {
|
||||
this.collection = await this.astraDB.createCollection(collection, options);
|
||||
console.debug("Created Astra DB collection");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to an existing collection in your Astra DB vector database.
|
||||
* You must call this method or `createAndConnect` before adding, deleting, or querying.
|
||||
*
|
||||
* @param collection - Your existing collection's name
|
||||
* @returns Promise that resolves if the connection did not throw an error.
|
||||
*/
|
||||
async connect(collection: string): Promise<void> {
|
||||
this.collection = await this.astraDB.collection(collection);
|
||||
console.debug("Connected to Astra DB collection");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance of your Astra DB client.
|
||||
* @returns the AstraDB client
|
||||
*/
|
||||
client(): DataAPIClient {
|
||||
return this.astraClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add your document(s) to your Astra DB collection.
|
||||
*
|
||||
* @returns an array of node ids which were added
|
||||
*/
|
||||
async add(nodes: BaseNode[]): Promise<string[]> {
|
||||
if (!this.collection) {
|
||||
throw new Error("Must connect to collection before adding.");
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
if (!nodes || nodes.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const dataToInsert = nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.contentKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
|
||||
return {
|
||||
$vector: node.getEmbedding(),
|
||||
[this.idKey]: node.id_,
|
||||
[this.contentKey]: node.getContent(MetadataMode.NONE),
|
||||
...metadata,
|
||||
};
|
||||
});
|
||||
|
||||
console.debug(`Adding ${dataToInsert.length} rows to table`);
|
||||
|
||||
const insertResult = await collection.insertMany(dataToInsert);
|
||||
|
||||
return insertResult.insertedIds as string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a document from your Astra DB collection.
|
||||
*
|
||||
* @param refDocId - The id of the document to delete
|
||||
* @param deleteOptions - DeleteOneOptions to pass to the delete query
|
||||
* @returns Promise that resolves if the delete query did not throw an error.
|
||||
*/
|
||||
async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: Parameters<Collection["deleteOne"]>[1],
|
||||
): Promise<void> {
|
||||
if (!this.collection) {
|
||||
throw new Error("Must connect to collection before deleting.");
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
console.debug(`Deleting row with id ${refDocId}`);
|
||||
|
||||
await collection.deleteOne(
|
||||
{
|
||||
_id: refDocId,
|
||||
},
|
||||
deleteOptions,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Query documents from your Astra DB collection to get the closest match to your embedding.
|
||||
*
|
||||
* @param query - VectorStoreQuery
|
||||
* @param options - FindOptions
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: Parameters<Collection["find"]>[1],
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
if (!this.collection) {
|
||||
throw new Error("Must connect to collection before querying.");
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
const astraFilter = this.toAstraFilter(query.filters);
|
||||
const cursor = await collection.find(astraFilter, <FindOptions>{
|
||||
...options,
|
||||
sort: query.queryEmbedding
|
||||
? { $vector: query.queryEmbedding }
|
||||
: options?.sort,
|
||||
limit: query.similarityTopK,
|
||||
includeSimilarity: true,
|
||||
});
|
||||
|
||||
const nodes: BaseNode[] = [];
|
||||
const ids: string[] = [];
|
||||
const similarities: number[] = [];
|
||||
|
||||
for await (const row of cursor) {
|
||||
const {
|
||||
$vector: embedding,
|
||||
$similarity: similarity,
|
||||
[this.idKey]: id,
|
||||
[this.contentKey]: content,
|
||||
...metadata
|
||||
} = row;
|
||||
|
||||
const node = metadataDictToNode(metadata, {
|
||||
fallback: {
|
||||
id,
|
||||
text: content,
|
||||
...metadata,
|
||||
},
|
||||
});
|
||||
node.setContent(content);
|
||||
|
||||
ids.push(id);
|
||||
similarities.push(similarity);
|
||||
nodes.push(node);
|
||||
}
|
||||
|
||||
return {
|
||||
similarities,
|
||||
ids,
|
||||
nodes,
|
||||
};
|
||||
}
|
||||
|
||||
private toAstraFilter(filters?: MetadataFilters): Filter<SomeDoc> {
|
||||
if (!filters || filters.filters?.length === 0) return {};
|
||||
const condition = filters.condition ?? FilterCondition.AND;
|
||||
const listFilter = filters.filters.map((f) => this.buildFilterItem(f));
|
||||
if (condition === FilterCondition.OR) return { $or: listFilter };
|
||||
if (condition === FilterCondition.AND) return { $and: listFilter };
|
||||
throw new Error(`Not supported filter condition: ${condition}`);
|
||||
}
|
||||
|
||||
private buildFilterItem(filter: MetadataFilter): Filter<SomeDoc> {
|
||||
const { key, operator, value } = filter;
|
||||
switch (operator) {
|
||||
case FilterOperator.EQ:
|
||||
return { [key]: value };
|
||||
case FilterOperator.NE:
|
||||
return { [key]: { $ne: value } };
|
||||
case FilterOperator.GT:
|
||||
return { [key]: { $gt: value } };
|
||||
case FilterOperator.LT:
|
||||
return { [key]: { $lt: value } };
|
||||
case FilterOperator.GTE:
|
||||
return { [key]: { $gte: value } };
|
||||
case FilterOperator.LTE:
|
||||
return { [key]: { $lte: value } };
|
||||
case FilterOperator.IN:
|
||||
return { [key]: { $in: parseArrayValue(value) } };
|
||||
case FilterOperator.NIN:
|
||||
return { [key]: { $nin: parseArrayValue(value) } };
|
||||
case FilterOperator.IS_EMPTY:
|
||||
return { [key]: { $size: 0 } };
|
||||
default:
|
||||
throw new Error(`Not supported filter operator: ${operator}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/astra";
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
export * from "@llamaindex/azure";
|
||||
@@ -1,235 +1 @@
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import {
|
||||
ChromaClient,
|
||||
type ChromaClientParams,
|
||||
type DeleteParams,
|
||||
type QueryRecordsParams,
|
||||
type QueryResponse,
|
||||
type Where,
|
||||
type WhereDocument,
|
||||
} from "chromadb";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
VectorStoreQueryMode,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
type ChromaDeleteOptions = {
|
||||
where?: Where;
|
||||
whereDocument?: WhereDocument;
|
||||
};
|
||||
|
||||
type ChromaQueryOptions = {
|
||||
whereDocument?: WhereDocument;
|
||||
};
|
||||
|
||||
type Collection = Awaited<ReturnType<ChromaClient["getOrCreateCollection"]>>;
|
||||
|
||||
const DEFAULT_TEXT_KEY = "text";
|
||||
|
||||
type ChromaFilterCondition = "$and" | "$or";
|
||||
type ChromaFilterOperator =
|
||||
| "$eq"
|
||||
| "$ne"
|
||||
| "$gt"
|
||||
| "$lt"
|
||||
| "$gte"
|
||||
| "$lte"
|
||||
| "$in"
|
||||
| "$nin";
|
||||
|
||||
export class ChromaVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
textKey: string;
|
||||
private chromaClient: ChromaClient;
|
||||
private collection: Collection | null = null;
|
||||
private collectionName: string;
|
||||
|
||||
constructor(
|
||||
init: {
|
||||
collectionName: string;
|
||||
textKey?: string;
|
||||
chromaClientParams?: ChromaClientParams;
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
this.collectionName = init.collectionName;
|
||||
this.chromaClient = new ChromaClient(init.chromaClientParams);
|
||||
this.textKey = init.textKey ?? DEFAULT_TEXT_KEY;
|
||||
}
|
||||
|
||||
client(): ChromaClient {
|
||||
return this.chromaClient;
|
||||
}
|
||||
|
||||
async getCollection(): Promise<Collection> {
|
||||
if (!this.collection) {
|
||||
const coll = await this.chromaClient.getOrCreateCollection({
|
||||
name: this.collectionName,
|
||||
});
|
||||
this.collection = coll;
|
||||
}
|
||||
return this.collection;
|
||||
}
|
||||
|
||||
private getDataToInsert(nodes: BaseNode[]) {
|
||||
const metadatas = nodes.map((node) =>
|
||||
nodeToMetadata(node, true, this.textKey, this.flatMetadata),
|
||||
);
|
||||
return {
|
||||
embeddings: nodes.map((node) => node.getEmbedding()),
|
||||
ids: nodes.map((node) => node.id_),
|
||||
metadatas,
|
||||
documents: nodes.map((node) => node.getContent(MetadataMode.NONE)),
|
||||
};
|
||||
}
|
||||
|
||||
async add(nodes: BaseNode[]): Promise<string[]> {
|
||||
if (!nodes || nodes.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const dataToInsert = this.getDataToInsert(nodes);
|
||||
const collection = await this.getCollection();
|
||||
await collection.add(dataToInsert);
|
||||
return nodes.map((node) => node.id_);
|
||||
}
|
||||
|
||||
async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: ChromaDeleteOptions,
|
||||
): Promise<void> {
|
||||
const collection = await this.getCollection();
|
||||
await collection.delete(<DeleteParams>{
|
||||
ids: [refDocId],
|
||||
where: deleteOptions?.where,
|
||||
whereDocument: deleteOptions?.whereDocument,
|
||||
});
|
||||
}
|
||||
|
||||
private transformChromaFilterCondition(
|
||||
condition: FilterCondition,
|
||||
): ChromaFilterCondition {
|
||||
switch (condition) {
|
||||
case FilterCondition.AND:
|
||||
return "$and";
|
||||
case FilterCondition.OR:
|
||||
return "$or";
|
||||
default:
|
||||
throw new Error(`Filter condition ${condition} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
private transformChromaFilterOperator(
|
||||
operator: FilterOperator,
|
||||
): ChromaFilterOperator {
|
||||
switch (operator) {
|
||||
case FilterOperator.EQ:
|
||||
return "$eq";
|
||||
case FilterOperator.NE:
|
||||
return "$ne";
|
||||
case FilterOperator.GT:
|
||||
return "$gt";
|
||||
case FilterOperator.LT:
|
||||
return "$lt";
|
||||
case FilterOperator.GTE:
|
||||
return "$gte";
|
||||
case FilterOperator.LTE:
|
||||
return "$lte";
|
||||
case FilterOperator.IN:
|
||||
return "$in";
|
||||
case FilterOperator.NIN:
|
||||
return "$nin";
|
||||
default:
|
||||
throw new Error(`Filter operator ${operator} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
private toChromaFilter(filters: MetadataFilters): Where {
|
||||
const chromaFilter: Where = {};
|
||||
const filtersList: Where[] = [];
|
||||
|
||||
const condition = filters.condition
|
||||
? this.transformChromaFilterCondition(
|
||||
filters.condition as FilterCondition,
|
||||
)
|
||||
: "$and";
|
||||
|
||||
if (filters.filters) {
|
||||
for (const filter of filters.filters) {
|
||||
if (filter.operator) {
|
||||
filtersList.push({
|
||||
[filter.key]: {
|
||||
[this.transformChromaFilterOperator(
|
||||
filter.operator as FilterOperator,
|
||||
)]: filter.value,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
filtersList.push({ [filter.key]: filter.value });
|
||||
}
|
||||
}
|
||||
|
||||
if (filtersList.length === 1) {
|
||||
return filtersList[0]!;
|
||||
} else if (filtersList.length > 1) {
|
||||
chromaFilter[condition] = filtersList;
|
||||
}
|
||||
}
|
||||
|
||||
return chromaFilter;
|
||||
}
|
||||
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: ChromaQueryOptions,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
if (query.docIds) {
|
||||
throw new Error("ChromaDB does not support querying by docIDs");
|
||||
}
|
||||
if (query.mode != VectorStoreQueryMode.DEFAULT) {
|
||||
throw new Error("ChromaDB does not support querying by mode");
|
||||
}
|
||||
|
||||
let chromaWhere: Where = {};
|
||||
if (query.filters) {
|
||||
chromaWhere = this.toChromaFilter(query.filters);
|
||||
}
|
||||
|
||||
const collection = await this.getCollection();
|
||||
const queryResponse: QueryResponse = await collection.query(<
|
||||
QueryRecordsParams
|
||||
>{
|
||||
queryEmbeddings: query.queryEmbedding ?? undefined,
|
||||
queryTexts: query.queryStr ?? undefined,
|
||||
nResults: query.similarityTopK,
|
||||
where: Object.keys(chromaWhere).length ? chromaWhere : undefined,
|
||||
whereDocument: options?.whereDocument,
|
||||
//ChromaDB doesn't return the result embeddings by default so we need to include them
|
||||
include: ["distances", "metadatas", "documents", "embeddings"],
|
||||
});
|
||||
|
||||
const vectorStoreQueryResult: VectorStoreQueryResult = {
|
||||
nodes: queryResponse.ids[0]!.map((id, index) => {
|
||||
const text = (queryResponse.documents as string[][])[0]![index];
|
||||
const metaData = queryResponse.metadatas[0]![index] ?? {};
|
||||
const node = metadataDictToNode(metaData);
|
||||
node.setContent(text);
|
||||
return node;
|
||||
}),
|
||||
similarities: (queryResponse.distances as number[][])[0]!.map(
|
||||
(distance) => 1 - distance,
|
||||
),
|
||||
ids: queryResponse.ids[0]!,
|
||||
};
|
||||
return vectorStoreQueryResult;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/chroma";
|
||||
|
||||
@@ -1,279 +1 @@
|
||||
import type { ChannelOptions } from "@grpc/grpc-js";
|
||||
import { BaseNode, MetadataMode, type Metadata } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import {
|
||||
DataType,
|
||||
MilvusClient,
|
||||
type ClientConfig,
|
||||
type DeleteReq,
|
||||
type RowData,
|
||||
type SearchSimpleReq,
|
||||
} from "@zilliz/milvus2-sdk-node";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
parsePrimitiveValue,
|
||||
} from "./utils.js";
|
||||
|
||||
function parseScalarFilters(scalarFilters: MetadataFilters): string {
|
||||
const condition = scalarFilters.condition ?? "and";
|
||||
const filters: string[] = [];
|
||||
|
||||
for (const filter of scalarFilters.filters) {
|
||||
switch (filter.operator) {
|
||||
case "==":
|
||||
case "!=": {
|
||||
filters.push(
|
||||
`metadata["${filter.key}"] ${filter.operator} "${parsePrimitiveValue(filter.value)}"`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
case "in": {
|
||||
const filterValue = parseArrayValue(filter.value)
|
||||
.map((v) => `"${v}"`)
|
||||
.join(", ");
|
||||
filters.push(
|
||||
`metadata["${filter.key}"] ${filter.operator} [${filterValue}]`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
case "nin": {
|
||||
// Milvus does not support `nin` operator, so we need to manually check every value
|
||||
// Expected: not metadata["key"] != "value1" and not metadata["key"] != "value2"
|
||||
const filterStr = parseArrayValue(filter.value)
|
||||
.map((v) => `metadata["${filter.key}"] != "${v}"`)
|
||||
.join(" && ");
|
||||
filters.push(filterStr);
|
||||
break;
|
||||
}
|
||||
case "<":
|
||||
case "<=":
|
||||
case ">":
|
||||
case ">=": {
|
||||
filters.push(
|
||||
`metadata["${filter.key}"] ${filter.operator} ${parsePrimitiveValue(filter.value)}`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new Error(`Operator ${filter.operator} is not supported.`);
|
||||
}
|
||||
}
|
||||
|
||||
return filters.join(` ${condition} `);
|
||||
}
|
||||
|
||||
export class MilvusVectorStore extends BaseVectorStore {
|
||||
public storesText: boolean = true;
|
||||
public isEmbeddingQuery?: boolean = false;
|
||||
private flatMetadata: boolean = true;
|
||||
|
||||
private milvusClient: MilvusClient;
|
||||
private collectionInitialized = false;
|
||||
private collectionName: string;
|
||||
|
||||
private idKey: string;
|
||||
private contentKey: string;
|
||||
private metadataKey: string;
|
||||
private embeddingKey: string;
|
||||
|
||||
constructor(
|
||||
init?: Partial<{ milvusClient: MilvusClient }> &
|
||||
VectorStoreBaseParams & {
|
||||
params?: {
|
||||
configOrAddress: ClientConfig | string;
|
||||
ssl?: boolean;
|
||||
username?: string;
|
||||
password?: string;
|
||||
channelOptions?: ChannelOptions;
|
||||
};
|
||||
collection?: string;
|
||||
idKey?: string;
|
||||
contentKey?: string;
|
||||
metadataKey?: string;
|
||||
embeddingKey?: string;
|
||||
},
|
||||
) {
|
||||
super(init);
|
||||
if (init?.milvusClient) {
|
||||
this.milvusClient = init.milvusClient;
|
||||
} else {
|
||||
const configOrAddress =
|
||||
init?.params?.configOrAddress ?? getEnv("MILVUS_ADDRESS");
|
||||
const ssl = init?.params?.ssl ?? getEnv("MILVUS_SSL") === "true";
|
||||
const username = init?.params?.username ?? getEnv("MILVUS_USERNAME");
|
||||
const password = init?.params?.password ?? getEnv("MILVUS_PASSWORD");
|
||||
|
||||
if (!configOrAddress) {
|
||||
throw new Error("Must specify MILVUS_ADDRESS via env variable.");
|
||||
}
|
||||
this.milvusClient = new MilvusClient(
|
||||
configOrAddress,
|
||||
ssl,
|
||||
username,
|
||||
password,
|
||||
init?.params?.channelOptions,
|
||||
);
|
||||
}
|
||||
|
||||
this.collectionName = init?.collection ?? "llamacollection";
|
||||
this.idKey = init?.idKey ?? "id";
|
||||
this.contentKey = init?.contentKey ?? "content";
|
||||
this.metadataKey = init?.metadataKey ?? "metadata";
|
||||
this.embeddingKey = init?.embeddingKey ?? "embedding";
|
||||
}
|
||||
|
||||
public client(): MilvusClient {
|
||||
return this.milvusClient;
|
||||
}
|
||||
|
||||
private async createCollection() {
|
||||
await this.milvusClient.createCollection({
|
||||
collection_name: this.collectionName,
|
||||
fields: [
|
||||
{
|
||||
name: this.idKey,
|
||||
data_type: DataType.VarChar,
|
||||
is_primary_key: true,
|
||||
max_length: 200,
|
||||
},
|
||||
{
|
||||
name: this.embeddingKey,
|
||||
data_type: DataType.FloatVector,
|
||||
dim: 1536,
|
||||
},
|
||||
{
|
||||
name: this.contentKey,
|
||||
data_type: DataType.VarChar,
|
||||
max_length: 9000,
|
||||
},
|
||||
{
|
||||
name: this.metadataKey,
|
||||
data_type: DataType.JSON,
|
||||
},
|
||||
],
|
||||
});
|
||||
await this.milvusClient.createIndex({
|
||||
collection_name: this.collectionName,
|
||||
field_name: this.embeddingKey,
|
||||
});
|
||||
}
|
||||
|
||||
private async ensureCollection(): Promise<void> {
|
||||
if (!this.collectionInitialized) {
|
||||
await this.milvusClient.connectPromise;
|
||||
|
||||
// Check collection exists
|
||||
const isCollectionExist = await this.milvusClient.hasCollection({
|
||||
collection_name: this.collectionName,
|
||||
});
|
||||
if (!isCollectionExist.value) {
|
||||
await this.createCollection();
|
||||
}
|
||||
|
||||
await this.milvusClient.loadCollectionSync({
|
||||
collection_name: this.collectionName,
|
||||
});
|
||||
this.collectionInitialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
public async add(nodes: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
await this.ensureCollection();
|
||||
|
||||
const result = await this.milvusClient.insert({
|
||||
collection_name: this.collectionName,
|
||||
data: nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.contentKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
|
||||
const entry: RowData = {
|
||||
[this.idKey]: node.id_,
|
||||
[this.embeddingKey]: node.getEmbedding(),
|
||||
[this.contentKey]: node.getContent(MetadataMode.NONE),
|
||||
[this.metadataKey]: metadata,
|
||||
};
|
||||
|
||||
return entry;
|
||||
}),
|
||||
});
|
||||
|
||||
if (!result.IDs) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if ("int_id" in result.IDs) {
|
||||
return result.IDs.int_id.data.map((i) => String(i));
|
||||
}
|
||||
|
||||
return result.IDs.str_id.data.map((s) => String(s));
|
||||
}
|
||||
|
||||
public async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: Omit<DeleteReq, "ids">,
|
||||
): Promise<void> {
|
||||
await this.ensureCollection();
|
||||
|
||||
await this.milvusClient.delete({
|
||||
ids: [refDocId],
|
||||
collection_name: this.collectionName,
|
||||
...deleteOptions,
|
||||
});
|
||||
}
|
||||
|
||||
public toMilvusFilter(filters?: MetadataFilters): string | undefined {
|
||||
if (!filters) return undefined;
|
||||
// TODO: Milvus also support standard filters, we can add it later
|
||||
return parseScalarFilters(filters);
|
||||
}
|
||||
|
||||
public async query(
|
||||
query: VectorStoreQuery,
|
||||
_options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
await this.ensureCollection();
|
||||
|
||||
const found = await this.milvusClient.search(<SearchSimpleReq>{
|
||||
collection_name: this.collectionName,
|
||||
limit: query.similarityTopK,
|
||||
vector: query.queryEmbedding,
|
||||
filter: this.toMilvusFilter(query.filters),
|
||||
});
|
||||
|
||||
const nodes: BaseNode<Metadata>[] = [];
|
||||
const similarities: number[] = [];
|
||||
const ids: string[] = [];
|
||||
|
||||
found.results.forEach((result) => {
|
||||
const node = metadataDictToNode(result.metadata);
|
||||
node.setContent(result.content);
|
||||
nodes.push(node);
|
||||
|
||||
similarities.push(result.score);
|
||||
ids.push(String(result.id));
|
||||
});
|
||||
|
||||
return {
|
||||
nodes,
|
||||
similarities,
|
||||
ids,
|
||||
};
|
||||
}
|
||||
|
||||
public async persist() {
|
||||
// no need to do anything
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/milvus";
|
||||
|
||||
@@ -1,340 +1 @@
|
||||
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { BulkWriteOptions, Collection } from "mongodb";
|
||||
import { MongoClient } from "mongodb";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
type FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
// define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/
|
||||
const DEFAULT_EMBEDDING_DEFINITION = {
|
||||
type: "knnVector",
|
||||
dimensions: 1536,
|
||||
similarity: "cosine",
|
||||
};
|
||||
|
||||
function mapLcMqlFilterOperators(operator: string): string {
|
||||
const operatorMap: { [key in FilterOperator]?: string } = {
|
||||
"==": "$eq",
|
||||
"<": "$lt",
|
||||
"<=": "$lte",
|
||||
">": "$gt",
|
||||
">=": "$gte",
|
||||
"!=": "$ne",
|
||||
in: "$in",
|
||||
nin: "$nin",
|
||||
};
|
||||
const mqlOperator = operatorMap[operator as FilterOperator];
|
||||
if (!mqlOperator) throw new Error(`Unsupported operator: ${operator}`);
|
||||
return mqlOperator;
|
||||
}
|
||||
|
||||
function toMongoDBFilter(filters?: MetadataFilters): Record<string, unknown> {
|
||||
if (!filters) return {};
|
||||
|
||||
const createFilterObject = (mf: MetadataFilter) => ({
|
||||
[mf.key]: {
|
||||
[mapLcMqlFilterOperators(mf.operator)]: mf.value,
|
||||
},
|
||||
});
|
||||
|
||||
if (filters.filters.length === 1) {
|
||||
return createFilterObject(filters.filters[0]!);
|
||||
}
|
||||
|
||||
if (filters.condition === FilterCondition.AND) {
|
||||
return { $and: filters.filters.map(createFilterObject) };
|
||||
}
|
||||
|
||||
if (filters.condition === FilterCondition.OR) {
|
||||
return { $or: filters.filters.map(createFilterObject) };
|
||||
}
|
||||
|
||||
throw new Error("filters condition not recognized. Must be AND or OR");
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector store that uses MongoDB Atlas for storage and vector search.
|
||||
* This store uses the $vectorSearch aggregation stage to perform vector similarity search.
|
||||
*/
|
||||
export class MongoDBAtlasVectorSearch extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
|
||||
dbName: string;
|
||||
collectionName: string;
|
||||
autoCreateIndex: boolean;
|
||||
embeddingDefinition: Record<string, unknown>;
|
||||
indexedMetadataFields: string[];
|
||||
|
||||
/**
|
||||
* The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
|
||||
*/
|
||||
mongodbClient: MongoClient;
|
||||
|
||||
/**
|
||||
* Name of the vector index. If invalid, Mongo will silently ignore this issue and return 0 results.
|
||||
*
|
||||
* Default: "default"
|
||||
*/
|
||||
indexName: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the embedding vector.
|
||||
*
|
||||
* Default: "embedding"
|
||||
*/
|
||||
embeddingKey: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the node id.
|
||||
*
|
||||
* Default: "id"
|
||||
*/
|
||||
idKey: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the node text.
|
||||
*
|
||||
* Default: "text"
|
||||
*/
|
||||
textKey: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the node metadata.
|
||||
*
|
||||
* Default: "metadata"
|
||||
*/
|
||||
metadataKey: string;
|
||||
|
||||
/**
|
||||
* Options to pass to the insertMany function when adding nodes.
|
||||
*/
|
||||
insertOptions?: BulkWriteOptions | undefined;
|
||||
|
||||
/**
|
||||
* Function to determine the number of candidates to retrieve for a given query.
|
||||
* In case your results are not good, you might tune this value.
|
||||
*
|
||||
* {@link https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ | Run Vector Search Queries}
|
||||
*
|
||||
* {@link https://arxiv.org/abs/1603.09320 | Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs}
|
||||
*
|
||||
*
|
||||
* Default: query.similarityTopK * 10
|
||||
*/
|
||||
numCandidates: (query: VectorStoreQuery) => number;
|
||||
private collection?: Collection;
|
||||
|
||||
constructor(
|
||||
init: Partial<MongoDBAtlasVectorSearch> & {
|
||||
dbName: string;
|
||||
collectionName: string;
|
||||
embedModel?: BaseEmbedding;
|
||||
autoCreateIndex?: boolean;
|
||||
indexedMetadataFields?: string[];
|
||||
embeddingDefinition?: Record<string, unknown>;
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
if (init.mongodbClient) {
|
||||
this.mongodbClient = init.mongodbClient;
|
||||
} else {
|
||||
const mongoUri = getEnv("MONGODB_URI");
|
||||
if (!mongoUri) {
|
||||
throw new Error(
|
||||
"Must specify MONGODB_URI via env variable if not directly passing in client.",
|
||||
);
|
||||
}
|
||||
this.mongodbClient = new MongoClient(mongoUri);
|
||||
}
|
||||
|
||||
this.dbName = init.dbName ?? "default_db";
|
||||
this.collectionName = init.collectionName ?? "default_collection";
|
||||
this.autoCreateIndex = init.autoCreateIndex ?? true;
|
||||
this.indexedMetadataFields = init.indexedMetadataFields ?? [];
|
||||
this.embeddingDefinition = {
|
||||
...DEFAULT_EMBEDDING_DEFINITION,
|
||||
...(init.embeddingDefinition ?? {}),
|
||||
};
|
||||
this.indexName = init.indexName ?? "default";
|
||||
this.embeddingKey = init.embeddingKey ?? "embedding";
|
||||
this.idKey = init.idKey ?? "id";
|
||||
this.textKey = init.textKey ?? "text";
|
||||
this.metadataKey = init.metadataKey ?? "metadata";
|
||||
this.numCandidates =
|
||||
init.numCandidates ?? ((query) => query.similarityTopK * 10);
|
||||
this.insertOptions = init.insertOptions;
|
||||
}
|
||||
|
||||
async ensureCollection(): Promise<Collection> {
|
||||
if (!this.collection) {
|
||||
const collection = await this.mongodbClient
|
||||
.db(this.dbName)
|
||||
.createCollection(this.collectionName);
|
||||
|
||||
this.collection = collection;
|
||||
}
|
||||
|
||||
if (this.autoCreateIndex) {
|
||||
const searchIndexes = await this.collection.listSearchIndexes().toArray();
|
||||
const indexExists = searchIndexes.some(
|
||||
(index) => index.name === this.indexName,
|
||||
);
|
||||
if (!indexExists) {
|
||||
const additionalDefinition: Record<string, { type: string }> = {};
|
||||
this.indexedMetadataFields.forEach((field) => {
|
||||
additionalDefinition[field] = { type: "token" };
|
||||
});
|
||||
await this.collection.createSearchIndex({
|
||||
name: this.indexName,
|
||||
definition: {
|
||||
mappings: {
|
||||
dynamic: true,
|
||||
fields: {
|
||||
embedding: this.embeddingDefinition,
|
||||
...additionalDefinition,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return this.collection;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add nodes to the vector store.
|
||||
*
|
||||
* @param nodes Nodes to add to the vector store
|
||||
* @returns List of node ids that were added
|
||||
*/
|
||||
async add(nodes: BaseNode[]): Promise<string[]> {
|
||||
if (!nodes || nodes.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const dataToInsert = nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.textKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
|
||||
// Include the specified metadata fields in the top level of the document (to help filter)
|
||||
const populatedMetadata: Record<string, unknown> = {};
|
||||
for (const field of this.indexedMetadataFields) {
|
||||
populatedMetadata[field] = metadata[field];
|
||||
}
|
||||
|
||||
return {
|
||||
[this.idKey]: node.id_,
|
||||
[this.embeddingKey]: node.getEmbedding(),
|
||||
[this.textKey]: node.getContent(MetadataMode.NONE) || "",
|
||||
[this.metadataKey]: metadata,
|
||||
...populatedMetadata,
|
||||
};
|
||||
});
|
||||
|
||||
const collection = await this.ensureCollection();
|
||||
const insertResult = await collection.insertMany(
|
||||
dataToInsert,
|
||||
this.insertOptions,
|
||||
);
|
||||
return nodes.map((node) => node.id_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete nodes from the vector store with the given redDocId.
|
||||
*
|
||||
* @param refDocId The refDocId of the nodes to delete
|
||||
* @param deleteOptions Options to pass to the deleteOne function
|
||||
*/
|
||||
async delete(refDocId: string, deleteOptions?: object): Promise<void> {
|
||||
const collection = await this.ensureCollection();
|
||||
await collection.deleteMany(
|
||||
{
|
||||
[`${this.metadataKey}.ref_doc_id`]: refDocId,
|
||||
},
|
||||
deleteOptions,
|
||||
);
|
||||
}
|
||||
|
||||
client() {
|
||||
return this.mongodbClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a vector similarity search query.
|
||||
*
|
||||
* @param query The query to run
|
||||
* @returns List of nodes and their similarities
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const params: Record<string, unknown> = {
|
||||
queryVector: query.queryEmbedding,
|
||||
path: this.embeddingKey,
|
||||
numCandidates: this.numCandidates(query),
|
||||
limit: query.similarityTopK,
|
||||
index: this.indexName,
|
||||
};
|
||||
|
||||
if (query.filters) {
|
||||
params.filter = toMongoDBFilter(query.filters);
|
||||
}
|
||||
|
||||
const queryField = { $vectorSearch: params };
|
||||
const pipeline = [
|
||||
queryField,
|
||||
{
|
||||
$project: {
|
||||
score: { $meta: "vectorSearchScore" },
|
||||
[this.embeddingKey]: 0,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const collection = await this.ensureCollection();
|
||||
const cursor = await collection.aggregate(pipeline);
|
||||
|
||||
const nodes: BaseNode[] = [];
|
||||
const ids: string[] = [];
|
||||
const similarities: number[] = [];
|
||||
|
||||
for await (const res of await cursor) {
|
||||
const text = res[this.textKey];
|
||||
const score = res.score;
|
||||
const id = res[this.idKey];
|
||||
const metadata = res[this.metadataKey];
|
||||
|
||||
const node = metadataDictToNode(metadata);
|
||||
node.setContent(text);
|
||||
|
||||
ids.push(id);
|
||||
nodes.push(node);
|
||||
similarities.push(score);
|
||||
}
|
||||
|
||||
const result = {
|
||||
nodes,
|
||||
similarities,
|
||||
ids,
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/mongodb";
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user