Compare commits

..

28 Commits

Author SHA1 Message Date
github-actions[bot] bd940d1d43 Release 0.8.34 (#1630)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2025-02-07 16:02:27 +07:00
Thuc Pham 9f8ad37b79 fix: missing peer deps in llamaindex (#1631) 2025-02-07 15:06:41 +07:00
Peter Goldstein 7265f74c24 Add reasoning_effort for o1 and o3 (#1628) 2025-02-07 12:01:48 +07:00
github-actions[bot] e3f1b85846 Release 0.8.33 (#1619)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-02-06 15:53:47 +07:00
Peter Goldstein e38e474f86 Add Gemini 2.0 models (#1625) 2025-02-06 15:41:20 +07:00
Parham Saidi 2019a041f7 fix: o3 calls do not support temperature param (#1622) 2025-02-06 10:23:23 +07:00
Thuc Pham 067a4894fe fix: missing condition to stringify tool input (#1620) 2025-02-05 17:25:37 +07:00
clean99 21769c8ad9 Fix: update deprecated response property in examples (#1614)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-02-05 11:28:32 +07:00
github-actions[bot] 89ea1e1d31 Release 0.8.32 (#1595)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-02-04 12:23:47 +07:00
siquick d9bbaf95f3 chore: updated docs and examples to use correct PGVectorStore imports (#1611)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-02-04 11:18:39 +07:00
Marcus Schiesser 8744796c06 update the versioon of chromadb client (#1616)
Co-authored-by: my8bit <mail@my8bit.name>
2025-02-04 10:58:04 +07:00
Ihor Pavlenko f02621e379 fix: doc links (#1610) 2025-02-04 10:46:04 +07:00
Peter Goldstein 1892e1ce1d Add O3 mini model (#1612) 2025-02-04 10:34:22 +07:00
dependabot[bot] d90d8959a5 chore(deps-dev): bump vite from 5.4.11 to 5.4.12 (#1604)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-04 10:33:18 +07:00
Thuc Pham 4df1fe6cca chore: migrate llamaindex llms and embeddings to their own packages (#1615) 2025-02-03 17:32:19 +07:00
Thuc Pham 34faf4821a chore: move vector stores to their own packages (#1605) 2025-01-24 12:45:13 +07:00
Marcus Schiesser b24ffc6174 fix: pinecone import (#1603) 2025-01-21 12:32:05 +07:00
Thuc Pham 82e25c924c fix: remove ignore package from changeset (#1602) 2025-01-21 12:28:39 +07:00
Thuc Pham 1931bbca74 chore: move azure code to own @llamaindex/azure package (#1601) 2025-01-21 12:14:17 +07:00
Thuc Pham 94566169fb chore: move postgres storage classes to @llamaindex/postgres (#1597) 2025-01-20 12:24:44 +07:00
Alex Yang d6c270ec7a feat(cloud): support pass project and org id to llama parse reader (#1594) 2025-01-08 11:23:48 -08:00
github-actions[bot] e3a77044d5 Release 0.8.31 (#1579)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: himself65 <himself65@users.noreply.github.com>
2025-01-08 10:42:19 -08:00
Alex Yang fd9c8294e1 chore: bump llamacloud openapi (#1592) 2025-01-06 23:12:41 -08:00
Thuc Pham 0ebbfc1031 fix: clean up docstore when generating embedding fail (#1588)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-01-07 11:58:09 +07:00
Alex Yang 5dec9f912a chore: bump sdk deps (#1591) 2025-01-06 17:03:15 -08:00
Alex Yang 1f53819b64 chore: bump version (#1583) 2025-01-02 07:43:17 -08:00
Erik d211b7ab13 add tool call history support in chat messages (#1565) 2024-12-22 19:09:38 -08:00
Thuc Pham 057ee146bd fix: add start command for stackblitz (#1577) 2024-12-20 23:55:33 -08:00
245 changed files with 12460 additions and 5582 deletions
+1 -1
View File
@@ -150,7 +150,7 @@ jobs:
done
- name: Pack provider packages
run: |
for dir in packages/providers/*; do
for dir in packages/providers/* packages/providers/storage/*; do
if [ -d "$dir" ] && [ -f "$dir/package.json" ]; then
echo "Packing $dir"
pnpm pack --pack-destination ${{ runner.temp }} -C $dir
+1 -1
View File
@@ -76,7 +76,7 @@ If you need any of those classes, you have to import them instead directly thoug
Here's an example for importing the `PineconeVectorStore` class:
```typescript
import { PineconeVectorStore } from "llamaindex/storage/vectorStore/PineconeVectorStore";
import { PineconeVectorStore } from "llamaindex/vector-store/PineconeVectorStore";
```
As the `PDFReader` is not working with the Edge runtime, here's how to use the `SimpleDirectoryReader` with the `LlamaParseReader` to load PDFs:
+50
View File
@@ -1,5 +1,55 @@
# @llamaindex/doc
## 0.0.38
### Patch Changes
- Updated dependencies [9f8ad37]
- Updated dependencies [7265f74]
- llamaindex@0.8.34
- @llamaindex/openai@0.1.48
## 0.0.37
### Patch Changes
- Updated dependencies [2019a04]
- @llamaindex/openai@0.1.47
- llamaindex@0.8.33
## 0.0.36
### Patch Changes
- f02621e: Fix internal links between chapters
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [d6c270e]
- Updated dependencies [1892e1c]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
- @llamaindex/core@0.4.21
- @llamaindex/cloud@2.0.22
- @llamaindex/openai@0.1.46
- @llamaindex/node-parser@0.0.22
- @llamaindex/readers@1.0.23
## 0.0.35
### Patch Changes
- Updated dependencies [5dec9f9]
- Updated dependencies [fd9c829]
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- @llamaindex/cloud@2.0.21
- llamaindex@0.8.31
- @llamaindex/core@0.4.20
- @llamaindex/node-parser@0.0.21
- @llamaindex/openai@0.1.45
- @llamaindex/readers@1.0.22
## 0.0.34
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/doc",
"version": "0.0.34",
"version": "0.0.38",
"private": true,
"scripts": {
"build": "pnpm run build:docs && next build",
@@ -20,7 +20,7 @@ npm install llamaindex
## Choose your model
By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](local_model).
By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](3_local_model).
## Get an OpenAI API key
@@ -36,4 +36,4 @@ We'll use `dotenv` to pull the API key out of that .env file, so also run:
npm install dotenv
```
Now you're ready to [create your agent](create_agent).
Now you're ready to [create your agent](2_create_agent).
@@ -177,5 +177,5 @@ The second piece of output is the response from the LLM itself, where the `messa
Great! We've built an agent with tool use! Next you can:
- [See the full code](https://github.com/run-llama/ts-agents/blob/main/1_agent/agent.ts)
- [Switch to a local LLM](local_model)
- Move on to [add Retrieval-Augmented Generation to your agent](agentic_rag)
- [Switch to a local LLM](3_local_model)
- Move on to [add Retrieval-Augmented Generation to your agent](4_agentic_rag)
@@ -89,4 +89,4 @@ You can use a ReActAgent instead of an OpenAIAgent in any of the further example
### Next steps
Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](agentic_rag).
Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](4_agentic_rag).
@@ -153,4 +153,4 @@ The `OpenAIContextAwareAgent` approach simplifies the setup by allowing you to d
On the other hand, using the `QueryEngineTool` offers more flexibility and power. This method allows for customization in how queries are constructed and executed, enabling you to query data from various storages and process them in different ways. However, this added flexibility comes with increased complexity and response time due to the separate tool call and queryEngine generating tool output by LLM that is then passed to the agent.
So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](rag_and_tools)!
So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](5_rag_and_tools)!
@@ -127,4 +127,4 @@ In the final tool call, it used the `sumNumbers` function to add the two budgets
}
```
Great! Now let's improve accuracy by improving our parsing with [LlamaParse](llamaparse).
Great! Now let's improve accuracy by improving our parsing with [LlamaParse](6_llamaparse).
@@ -17,4 +17,4 @@ const documents = await reader.loadData("../data/sf_budget_2023_2024.pdf");
Now you will be able to ask more complicated questions of the same PDF and get better results. You can find this code [in our repo](https://github.com/run-llama/ts-agents/blob/main/4_llamaparse/agent.ts).
Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](qdrant).
Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](7_qdrant).
@@ -65,13 +65,13 @@ Since parsing a PDF can be slow, especially a large one, using the pre-parsed ch
In this guide you've learned how to
- [Create an agent](create_agent)
- [Create an agent](2_create_agent)
- Use remote LLMs like GPT-4
- [Use local LLMs like Mixtral](local_model)
- [Create a RAG query engine](agentic_rag)
- [Turn functions and query engines into agent tools](rag_and_tools)
- [Use local LLMs like Mixtral](3_local_model)
- [Create a RAG query engine](4_agentic_rag)
- [Turn functions and query engines into agent tools](5_rag_and_tools)
- Combine those tools
- [Enhance your parsing with LlamaParse](llamaparse)
- [Enhance your parsing with LlamaParse](6_llamaparse)
- Persist your data in a vector store
The next steps are up to you! Try creating more complex functions and query engines, and set your agent loose on the world.
@@ -21,4 +21,4 @@ Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for t
## API Reference
- [VectorStoreBase](/docs/api/classes/VectorStoreBase)
- [BaseVectorStore](/docs/api/classes/BaseVectorStore)
+7
View File
@@ -1,5 +1,12 @@
# @llamaindex/core-e2e
## 0.0.8
### Patch Changes
- 34faf48: chore: move vector stores to their own packages
- 9456616: refactor: @llamaindex/postgres
## 0.0.7
### Patch Changes
@@ -1,5 +1,36 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.130
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 0.0.129
### Patch Changes
- llamaindex@0.8.33
## 0.0.128
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 0.0.127
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 0.0.126
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.126",
"version": "0.0.130",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,20 @@
# @llamaindex/llama-parse-browser-test
## 0.0.42
### Patch Changes
- Updated dependencies [d6c270e]
- @llamaindex/cloud@2.0.22
## 0.0.41
### Patch Changes
- Updated dependencies [5dec9f9]
- Updated dependencies [fd9c829]
- @llamaindex/cloud@2.0.21
## 0.0.40
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llama-parse-browser-test",
"private": true,
"version": "0.0.40",
"version": "0.0.42",
"type": "module",
"scripts": {
"dev": "vite",
@@ -10,7 +10,7 @@
},
"devDependencies": {
"typescript": "^5.7.2",
"vite": "^5.4.11",
"vite": "^5.4.12",
"vite-plugin-wasm": "^3.3.0"
},
"dependencies": {
+31
View File
@@ -1,5 +1,36 @@
# @llamaindex/next-agent-test
## 0.1.130
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 0.1.129
### Patch Changes
- llamaindex@0.8.33
## 0.1.128
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 0.1.127
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 0.1.126
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.126",
"version": "0.1.130",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,36 @@
# test-edge-runtime
## 0.1.129
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 0.1.128
### Patch Changes
- llamaindex@0.8.33
## 0.1.127
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 0.1.126
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 0.1.125
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.125",
"version": "0.1.129",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,36 @@
# @llamaindex/next-node-runtime
## 0.0.111
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 0.0.110
### Patch Changes
- llamaindex@0.8.33
## 0.0.109
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 0.0.108
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 0.0.107
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.0.107",
"version": "0.0.111",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,36 @@
# @llamaindex/waku-query-engine-test
## 0.0.130
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 0.0.129
### Patch Changes
- llamaindex@0.8.33
## 0.0.128
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 0.0.127
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 0.0.126
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.126",
"version": "0.0.130",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/e2e",
"private": true,
"version": "0.0.7",
"version": "0.0.8",
"type": "module",
"scripts": {
"e2e": "node --import tsx --import ./mock-register.js --test ./node/**/*.e2e.ts",
+35
View File
@@ -1,5 +1,40 @@
# examples
## 0.1.0
### Minor Changes
- 21769c8: Update deprecated response property of query engine to message.content propery
### Patch Changes
- llamaindex@0.8.33
## 0.0.23
### Patch Changes
- 8744796: Update the chromadb npm client to support the latest chromadb image (0.6.3)
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
- @llamaindex/core@0.4.21
- @llamaindex/vercel@0.0.8
- @llamaindex/readers@1.0.23
## 0.0.22
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
- @llamaindex/core@0.4.20
- @llamaindex/vercel@0.0.7
- @llamaindex/readers@1.0.22
## 0.0.21
### Patch Changes
+2 -2
View File
@@ -33,12 +33,12 @@ async function main() {
});
// Chat with the agent
const response = await agent.chat({
const { message } = await agent.chat({
message: "What was his first salary?",
});
// Print the response
console.log(response.response);
console.log(message.content);
}
void main().then(() => {
+2 -2
View File
@@ -52,12 +52,12 @@ async function main() {
});
// Chat with the agent
const response = await agent.chat({
const { message } = await agent.chat({
message: "What was his first salary?",
});
// Print the response
console.log(response.response);
console.log(message.content);
}
void main().then(() => {
+2 -1
View File
@@ -1,4 +1,5 @@
import { OpenAI, OpenAIAgent, WikipediaTool } from "llamaindex";
import { OpenAI, OpenAIAgent } from "llamaindex";
import { WikipediaTool } from "../wiki";
async function main() {
const llm = new OpenAI({ model: "gpt-4-turbo" });
+4 -3
View File
@@ -1,5 +1,6 @@
import { Anthropic, FunctionTool, Settings, WikipediaTool } from "llamaindex";
import { Anthropic, FunctionTool, Settings } from "llamaindex";
import { AnthropicAgent } from "llamaindex/agent/anthropic";
import { WikipediaTool } from "../wiki";
Settings.callbackManager.on("llm-tool-call", (event) => {
console.log("llm-tool-call", event.detail.toolCall);
@@ -37,12 +38,12 @@ const agent = new AnthropicAgent({
});
async function main() {
const { response } = await agent.chat({
const { message } = await agent.chat({
message:
"What is the weather in New York? What's the history of New York from Wikipedia in 3 sentences?",
});
console.log(response);
console.log(message.content);
}
void main();
+1 -1
View File
@@ -8,7 +8,7 @@ import {
const collectionName = "movie_reviews";
async function main() {
const sourceFile: string = "./data/movie_reviews.csv";
const sourceFile: string = "../data/movie_reviews.csv";
try {
console.log(`Loading data from ${sourceFile}`);
+2 -2
View File
@@ -38,12 +38,12 @@ async function main() {
const query = "What is the meaning of life?";
// Query
const response = await queryEngine.query({
const { message } = await queryEngine.query({
query,
});
// Log the response
console.log(response.response);
console.log(message.content);
}
main().catch(console.error);
+9 -7
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/examples",
"private": true,
"version": "0.0.21",
"version": "0.1.0",
"dependencies": {
"@ai-sdk/openai": "^1.0.5",
"@aws-crypto/sha256-js": "^5.2.0",
@@ -9,9 +9,9 @@
"@azure/identity": "^4.4.1",
"@azure/search-documents": "^12.1.0",
"@datastax/astra-db-ts": "^1.4.1",
"@llamaindex/core": "^0.4.19",
"@llamaindex/readers": "^1.0.21",
"@llamaindex/vercel": "^0.0.6",
"@llamaindex/core": "^0.4.21",
"@llamaindex/readers": "^1.0.23",
"@llamaindex/vercel": "^0.0.8",
"@llamaindex/workflow": "^0.0.8",
"@notionhq/client": "^2.2.15",
"@pinecone-database/pinecone": "^4.0.0",
@@ -22,10 +22,12 @@
"commander": "^12.1.0",
"dotenv": "^16.4.5",
"js-tiktoken": "^1.0.14",
"llamaindex": "^0.8.28",
"mongodb": "^6.7.0",
"llamaindex": "^0.8.33",
"mongodb": "6.7.0",
"pathe": "^1.1.2",
"postgres": "^3.4.4"
"postgres": "^3.4.4",
"ajv": "^8.17.1",
"wikipedia": "^2.1.2"
},
"devDependencies": {
"@types/node": "^22.9.0",
+4 -4
View File
@@ -33,19 +33,19 @@ async function main() {
retriever,
});
const response = await queryEngine.query({
const { message } = await queryEngine.query({
query: "What did the author do growing up?",
});
// cohere response
console.log(response.response);
console.log(message.content);
const baseResponse = await baseQueryEngine.query({
const { message: baseMessage } = await baseQueryEngine.query({
query: "What did the author do growing up?",
});
// response without cohere
console.log(baseResponse.response);
console.log(baseMessage.content);
}
main().catch(console.error);
+19 -2
View File
@@ -37,7 +37,7 @@ Read and follow the instructions in the README.md file located one directory up
To import documents and save the embedding vectors to your database:
> `npx tsx pg-vector-store/load-docs.ts data`
> `npx tsx vector-store/pg/load-docs.ts data`
where data is the directory containing your input files. Using the `data` directory in the example above will read all of the files in that directory using the LlamaIndexTS default readers for each file type.
@@ -45,6 +45,23 @@ where data is the directory containing your input files. Using the `data` direct
To query using the resulting vector store:
> `npx tsx pg-vector-store/query.ts`
> `npx tsx vector-store/pg/query.ts`
The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query.
## Supabase
You can try the supabase example by running:
> `npx tsx vector-store/pg/supabase.ts`
This will use the `POSTGRES_URL` environment variable to connect to your Supabase database.
Get one from the Supabase project settings page. See more details here: https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connection
## Vercel
You can try the vercel example by running:
> `npx tsx vector-store/pg/vercel.ts`
For more information on Vercel Postgres, see: https://vercel.com/docs/storage/vercel-postgres/sdk
+1 -1
View File
@@ -1,10 +1,10 @@
// load-docs.ts
import {
PGVectorStore,
SimpleDirectoryReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
import fs from "node:fs/promises";
async function getSourceFilenames(sourceDir: string) {
+2 -1
View File
@@ -1,5 +1,6 @@
import dotenv from "dotenv";
import { Document, PGVectorStore, VectorStoreQueryMode } from "llamaindex";
import { Document, VectorStoreQueryMode } from "llamaindex";
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
import postgres from "postgres";
dotenv.config();
+2 -1
View File
@@ -1,4 +1,5 @@
import { PGVectorStore, VectorStoreIndex } from "llamaindex";
import { VectorStoreIndex } from "llamaindex";
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
async function main() {
// eslint-disable-next-line @typescript-eslint/no-require-imports
+35
View File
@@ -0,0 +1,35 @@
import dotenv from "dotenv";
import {
SimpleDirectoryReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
dotenv.config();
// Get direct connection string from Supabase and set it as POSTGRES_URL environment variable
// https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connection
const sourceDir = "../data";
const connectionString = process.env.POSTGRES_URL;
const rdr = new SimpleDirectoryReader();
const docs = await rdr.loadData({ directoryPath: sourceDir });
const pgvs = new PGVectorStore({ clientConfig: { connectionString } });
pgvs.setCollection(sourceDir);
const ctx = await storageContextFromDefaults({ vectorStore: pgvs });
const index = await VectorStoreIndex.fromDocuments(docs, {
storageContext: ctx,
});
const queryEngine = index.asQueryEngine();
const results = await queryEngine.query({
query: "Information about the planet",
});
console.log(results);
+2 -1
View File
@@ -1,7 +1,8 @@
// https://vercel.com/docs/storage/vercel-postgres/sdk
import { sql } from "@vercel/postgres";
import dotenv from "dotenv";
import { Document, PGVectorStore, VectorStoreQueryMode } from "llamaindex";
import { Document, VectorStoreQueryMode } from "llamaindex";
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
dotenv.config();
+2 -2
View File
@@ -21,12 +21,12 @@ async function main() {
// Query the index
const queryEngine = index.asQueryEngine();
const { response, sourceNodes } = await queryEngine.query({
const { message, sourceNodes } = await queryEngine.query({
query: "What did the author do in college?",
});
// Output response with sources
console.log(response);
console.log(message.content);
if (sourceNodes) {
sourceNodes.forEach((source: NodeWithScore, index: number) => {
+2 -2
View File
@@ -29,10 +29,10 @@ async function main() {
nodePostprocessor,
]);
const response = await queryEngine.query({
const { message } = await queryEngine.query({
query: "What did the author do growing up?",
});
console.log(response.response);
console.log(message.content);
}
main().catch(console.error);
+2 -1
View File
@@ -1,6 +1,7 @@
import { openai } from "@ai-sdk/openai";
import { VercelLLM } from "@llamaindex/vercel";
import { LLMAgent, WikipediaTool } from "llamaindex";
import { LLMAgent } from "llamaindex";
import { WikipediaTool } from "../wiki";
async function main() {
// Create an instance of VercelLLM with the OpenAI model
@@ -1,3 +1,5 @@
/** Example of a tool that uses Wikipedia */
import type { BaseTool, ToolMetadata } from "@llamaindex/core/llms";
import type { JSONSchemaType } from "ajv";
import { default as wiki } from "wikipedia";
@@ -7,7 +9,7 @@ type WikipediaParameter = {
lang?: string;
};
export type WikipediaToolParams = {
type WikipediaToolParams = {
metadata?: ToolMetadata<JSONSchemaType<WikipediaParameter>>;
};
@@ -43,8 +45,8 @@ export class WikipediaTool implements BaseTool<WikipediaParameter> {
page: string,
lang: string = this.DEFAULT_LANG,
): Promise<string> {
wiki.default.setLang(lang);
const pageResult = await wiki.default.page(page, { autoSuggest: false });
wiki.setLang(lang);
const pageResult = await wiki.page(page, { autoSuggest: false });
const content = await pageResult.content();
return content;
}
@@ -53,7 +55,7 @@ export class WikipediaTool implements BaseTool<WikipediaParameter> {
query,
lang = this.DEFAULT_LANG,
}: WikipediaParameter): Promise<string> {
const searchResult = await wiki.default.search(query);
const searchResult = await wiki.search(query);
if (searchResult.results.length === 0) return "No search results.";
return await this.loadData(searchResult.results[0].title, lang);
}
+31
View File
@@ -1,5 +1,36 @@
# @llamaindex/autotool
## 5.0.34
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 5.0.33
### Patch Changes
- llamaindex@0.8.33
## 5.0.32
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 5.0.31
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 5.0.30
### Patch Changes
@@ -1,5 +1,40 @@
# @llamaindex/autotool-01-node-example
## 0.0.77
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
- @llamaindex/autotool@5.0.34
## 0.0.76
### Patch Changes
- llamaindex@0.8.33
- @llamaindex/autotool@5.0.33
## 0.0.75
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
- @llamaindex/autotool@5.0.32
## 0.0.74
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
- @llamaindex/autotool@5.0.31
## 0.0.73
### Patch Changes
@@ -13,5 +13,5 @@
"scripts": {
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
},
"version": "0.0.73"
"version": "0.0.77"
}
@@ -1,5 +1,40 @@
# @llamaindex/autotool-02-next-example
## 0.1.121
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
- @llamaindex/autotool@5.0.34
## 0.1.120
### Patch Changes
- llamaindex@0.8.33
- @llamaindex/autotool@5.0.33
## 0.1.119
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
- @llamaindex/autotool@5.0.32
## 0.1.118
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
- @llamaindex/autotool@5.0.31
## 0.1.117
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool-02-next-example",
"private": true,
"version": "0.1.117",
"version": "0.1.121",
"scripts": {
"dev": "next dev",
"build": "next build",
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool",
"type": "module",
"version": "5.0.30",
"version": "5.0.34",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
@@ -70,7 +70,7 @@
"@swc/types": "^0.1.12",
"@types/json-schema": "^7.0.15",
"@types/node": "^22.9.0",
"bunchee": "6.0.3",
"bunchee": "6.2.0",
"llamaindex": "workspace:*",
"next": "15.0.3",
"rollup": "^4.28.1",
+18
View File
@@ -1,5 +1,23 @@
# @llamaindex/cloud
## 2.0.22
### Patch Changes
- d6c270e: feat: support pass project and org id to llama parse reader
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- @llamaindex/core@0.4.21
## 2.0.21
### Patch Changes
- 5dec9f9: chore: bump sdk deps version
- fd9c829: chore: bump llamacloud openapi
- Updated dependencies [d211b7a]
- @llamaindex/core@0.4.20
## 2.0.20
### Patch Changes
+9 -3
View File
@@ -10,7 +10,13 @@ export default defineConfig({
format: "prettier",
lint: "eslint",
},
types: {
enums: "javascript",
},
plugins: [
"@hey-api/schemas",
"@hey-api/sdk",
{
enums: "javascript",
identifierCase: "preserve",
name: "@hey-api/typescript",
},
],
});
+4694 -190
View File
File diff suppressed because it is too large Load Diff
+4 -4
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "2.0.20",
"version": "2.0.22",
"type": "module",
"license": "MIT",
"scripts": {
@@ -50,11 +50,11 @@
"directory": "packages/cloud"
},
"devDependencies": {
"@hey-api/client-fetch": "^0.4.4",
"@hey-api/openapi-ts": "^0.56.0",
"@hey-api/client-fetch": "^0.6.0",
"@hey-api/openapi-ts": "^0.61.0",
"@llamaindex/core": "workspace:*",
"@llamaindex/env": "workspace:*",
"bunchee": "6.0.3"
"bunchee": "6.2.0"
},
"peerDependencies": {
"@llamaindex/core": "workspace:*",
+28
View File
@@ -31,6 +31,8 @@ var process: any;
* See https://github.com/run-llama/llama_parse
*/
export class LlamaParseReader extends FileReader {
project_id?: string | undefined;
organization_id?: string | undefined;
// The API key for the LlamaParse API. Can be set as an environment variable: LLAMA_CLOUD_API_KEY
apiKey: string;
// The base URL of the Llama Cloud Platform.
@@ -118,6 +120,7 @@ export class LlamaParseReader extends FileReader {
structured_output?: boolean | undefined;
structured_output_json_schema?: string | undefined;
structured_output_json_schema_name?: string | undefined;
extract_layout?: boolean | undefined;
// numWorkers is implemented in SimpleDirectoryReader
stdout?: WriteStream | undefined;
@@ -248,6 +251,7 @@ export class LlamaParseReader extends FileReader {
structured_output_json_schema: this.structured_output_json_schema,
structured_output_json_schema_name:
this.structured_output_json_schema_name,
extract_layout: this.extract_layout,
} satisfies {
[Key in keyof Body_upload_file_api_v1_parsing_upload_post]-?:
| Body_upload_file_api_v1_parsing_upload_post[Key]
@@ -257,6 +261,10 @@ export class LlamaParseReader extends FileReader {
const response = await uploadFileApiV1ParsingUploadPost({
client: this.#client,
throwOnError: true,
query: {
project_id: this.project_id ?? null,
organization_id: this.organization_id ?? null,
},
signal: AbortSignal.timeout(this.maxTimeout * 1000),
body,
});
@@ -282,6 +290,10 @@ export class LlamaParseReader extends FileReader {
path: {
job_id: jobId,
},
query: {
project_id: this.project_id ?? null,
organization_id: this.organization_id ?? null,
},
signal,
});
const { data } = result;
@@ -298,6 +310,10 @@ export class LlamaParseReader extends FileReader {
path: {
job_id: jobId,
},
query: {
project_id: this.project_id ?? null,
organization_id: this.organization_id ?? null,
},
signal,
});
break;
@@ -309,6 +325,10 @@ export class LlamaParseReader extends FileReader {
path: {
job_id: jobId,
},
query: {
project_id: this.project_id ?? null,
organization_id: this.organization_id ?? null,
},
signal,
});
break;
@@ -320,6 +340,10 @@ export class LlamaParseReader extends FileReader {
path: {
job_id: jobId,
},
query: {
project_id: this.project_id ?? null,
organization_id: this.organization_id ?? null,
},
signal,
});
break;
@@ -509,6 +533,10 @@ export class LlamaParseReader extends FileReader {
job_id: jobId,
name: imageName,
},
query: {
project_id: this.project_id ?? null,
organization_id: this.organization_id ?? null,
},
});
if (response.error) {
throw new Error(`Failed to download image: ${response.error.detail}`);
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/community
## 0.0.79
### Patch Changes
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- @llamaindex/core@0.4.21
## 0.0.78
### Patch Changes
- Updated dependencies [d211b7a]
- @llamaindex/core@0.4.20
## 0.0.77
### Patch Changes
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/community",
"description": "Community package for LlamaIndexTS",
"version": "0.0.77",
"version": "0.0.79",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
@@ -43,7 +43,7 @@
},
"devDependencies": {
"@types/node": "^22.9.0",
"bunchee": "6.0.3"
"bunchee": "6.2.0"
},
"dependencies": {
"@aws-sdk/client-bedrock-agent-runtime": "^3.706.0",
+13
View File
@@ -1,5 +1,18 @@
# @llamaindex/core
## 0.4.21
### Patch Changes
- 9456616: refactor: @llamaindex/postgres
- 1931bbc: refactor: @llamaindex/azure
## 0.4.20
### Patch Changes
- d211b7a: added support for tool calls with results in message history for athropic agent
## 0.4.19
### Patch Changes
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.4.19",
"version": "0.4.21",
"description": "LlamaIndex Core Module",
"exports": {
"./agent": {
@@ -391,7 +391,7 @@
"devDependencies": {
"@edge-runtime/vm": "^4.0.4",
"ajv": "^8.17.1",
"bunchee": "6.0.3",
"bunchee": "6.2.0",
"happy-dom": "^15.11.6",
"natural": "^8.0.1"
},
+15
View File
@@ -1,5 +1,6 @@
import { getEnv } from "@llamaindex/env";
import type { Tokenizer } from "@llamaindex/env/tokenizers";
import type { BaseEmbedding } from "../embeddings";
import type { LLM } from "../llms";
import {
type CallbackManager,
@@ -12,6 +13,11 @@ import {
setChunkSize,
withChunkSize,
} from "./settings/chunk-size";
import {
getEmbeddedModel,
setEmbeddedModel,
withEmbeddedModel,
} from "./settings/embedModel";
import { getLLM, setLLM, withLLM } from "./settings/llm";
import {
getTokenizer,
@@ -29,6 +35,15 @@ export const Settings = {
withLLM<Result>(llm: LLM, fn: () => Result): Result {
return withLLM(llm, fn);
},
get embedModel() {
return getEmbeddedModel();
},
set embedModel(embedModel) {
setEmbeddedModel(embedModel);
},
withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
return withEmbeddedModel(embedModel, fn);
},
get tokenizer() {
return getTokenizer();
},
@@ -1,15 +1,18 @@
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import { AsyncLocalStorage } from "@llamaindex/env";
import { OpenAIEmbedding } from "@llamaindex/openai";
const embeddedModelAsyncLocalStorage = new AsyncLocalStorage<BaseEmbedding>();
let globalEmbeddedModel: BaseEmbedding | null = null;
export function getEmbeddedModel(): BaseEmbedding {
if (globalEmbeddedModel === null) {
globalEmbeddedModel = new OpenAIEmbedding();
const currentEmbeddedModel =
embeddedModelAsyncLocalStorage.getStore() ?? globalEmbeddedModel;
if (!currentEmbeddedModel) {
throw new Error(
"Cannot find Embedding, please set `Settings.embedModel = ...` on the top of your code",
);
}
return embeddedModelAsyncLocalStorage.getStore() ?? globalEmbeddedModel;
return currentEmbeddedModel;
}
export function setEmbeddedModel(embeddedModel: BaseEmbedding) {
@@ -0,0 +1,167 @@
import { path } from "@llamaindex/env";
import {
DEFAULT_DOC_STORE_PERSIST_FILENAME,
DEFAULT_PERSIST_DIR,
} from "../../global";
import type { StoredValue } from "../../schema";
import { BaseNode, Document, ObjectType, TextNode } from "../../schema";
const TYPE_KEY = "__type__";
const DATA_KEY = "__data__";
export interface Serializer<T> {
toPersistence(data: Record<string, unknown>): T;
fromPersistence(data: T): Record<string, unknown>;
}
export const jsonSerializer: Serializer<string> = {
toPersistence(data) {
return JSON.stringify(data);
},
fromPersistence(data) {
return JSON.parse(data);
},
};
export const noneSerializer: Serializer<Record<string, unknown>> = {
toPersistence(data) {
return data;
},
fromPersistence(data) {
return data;
},
};
type DocJson<Data> = {
[TYPE_KEY]: ObjectType;
[DATA_KEY]: Data;
};
export function isValidDocJson(
docJson: StoredValue | null | undefined,
): docJson is DocJson<unknown> {
return (
typeof docJson === "object" &&
docJson !== null &&
docJson[TYPE_KEY] !== undefined &&
docJson[DATA_KEY] !== undefined
);
}
export function docToJson(
doc: BaseNode,
serializer: Serializer<unknown>,
): DocJson<unknown> {
return {
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
[TYPE_KEY]: doc.type,
};
}
export function jsonToDoc<Data>(
docDict: DocJson<Data>,
serializer: Serializer<Data>,
): BaseNode {
const docType = docDict[TYPE_KEY];
// fixme: zod type check this
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]);
let doc: BaseNode;
if (docType === ObjectType.DOCUMENT) {
doc = new Document({
text: dataDict.text,
id_: dataDict.id_,
embedding: dataDict.embedding,
hash: dataDict.hash,
metadata: dataDict.metadata,
});
} else if (docType === ObjectType.TEXT) {
doc = new TextNode({
text: dataDict.text,
id_: dataDict.id_,
hash: dataDict.hash,
metadata: dataDict.metadata,
relationships: dataDict.relationships,
});
} else {
throw new Error(`Unknown doc type: ${docType}`);
}
return doc;
}
const DEFAULT_PERSIST_PATH = path.join(
DEFAULT_PERSIST_DIR,
DEFAULT_DOC_STORE_PERSIST_FILENAME,
);
export interface RefDocInfo {
nodeIds: string[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
extraInfo: Record<string, any>;
}
export abstract class BaseDocumentStore {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
serializer: Serializer<any> = jsonSerializer;
// Save/load
persist(persistPath: string = DEFAULT_PERSIST_PATH): void {
// Persist the docstore to a file.
}
// Main interface
abstract docs(): Promise<Record<string, BaseNode>>;
abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>;
abstract getDocument(
docId: string,
raiseError: boolean,
): Promise<BaseNode | undefined>;
abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>;
abstract documentExists(docId: string): Promise<boolean>;
// Hash
abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
abstract getDocumentHash(docId: string): Promise<string | undefined>;
abstract getAllDocumentHashes(): Promise<Record<string, string>>;
// Ref Docs
abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>;
abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>;
abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>;
// Nodes
getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> {
return Promise.all(
nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)),
);
}
async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> {
const doc = await this.getDocument(nodeId, raiseError);
if (!(doc instanceof BaseNode)) {
throw new Error(`Document ${nodeId} is not a Node.`);
}
return doc;
}
async getNodeDict(nodeIdDict: {
[index: number]: string;
}): Promise<Record<number, BaseNode>> {
const result: Record<number, BaseNode> = {};
for (const index in nodeIdDict) {
result[index] = await this.getNode(nodeIdDict[index]!);
}
return result;
}
}
+2 -167
View File
@@ -1,167 +1,2 @@
import { path } from "@llamaindex/env";
import {
DEFAULT_DOC_STORE_PERSIST_FILENAME,
DEFAULT_PERSIST_DIR,
} from "../../global";
import type { StoredValue } from "../../schema";
import { BaseNode, Document, ObjectType, TextNode } from "../../schema";
const TYPE_KEY = "__type__";
const DATA_KEY = "__data__";
export interface Serializer<T> {
toPersistence(data: Record<string, unknown>): T;
fromPersistence(data: T): Record<string, unknown>;
}
export const jsonSerializer: Serializer<string> = {
toPersistence(data) {
return JSON.stringify(data);
},
fromPersistence(data) {
return JSON.parse(data);
},
};
export const noneSerializer: Serializer<Record<string, unknown>> = {
toPersistence(data) {
return data;
},
fromPersistence(data) {
return data;
},
};
type DocJson<Data> = {
[TYPE_KEY]: ObjectType;
[DATA_KEY]: Data;
};
export function isValidDocJson(
docJson: StoredValue | null | undefined,
): docJson is DocJson<unknown> {
return (
typeof docJson === "object" &&
docJson !== null &&
docJson[TYPE_KEY] !== undefined &&
docJson[DATA_KEY] !== undefined
);
}
export function docToJson(
doc: BaseNode,
serializer: Serializer<unknown>,
): DocJson<unknown> {
return {
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
[TYPE_KEY]: doc.type,
};
}
export function jsonToDoc<Data>(
docDict: DocJson<Data>,
serializer: Serializer<Data>,
): BaseNode {
const docType = docDict[TYPE_KEY];
// fixme: zod type check this
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]);
let doc: BaseNode;
if (docType === ObjectType.DOCUMENT) {
doc = new Document({
text: dataDict.text,
id_: dataDict.id_,
embedding: dataDict.embedding,
hash: dataDict.hash,
metadata: dataDict.metadata,
});
} else if (docType === ObjectType.TEXT) {
doc = new TextNode({
text: dataDict.text,
id_: dataDict.id_,
hash: dataDict.hash,
metadata: dataDict.metadata,
relationships: dataDict.relationships,
});
} else {
throw new Error(`Unknown doc type: ${docType}`);
}
return doc;
}
const DEFAULT_PERSIST_PATH = path.join(
DEFAULT_PERSIST_DIR,
DEFAULT_DOC_STORE_PERSIST_FILENAME,
);
export interface RefDocInfo {
nodeIds: string[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
extraInfo: Record<string, any>;
}
export abstract class BaseDocumentStore {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
serializer: Serializer<any> = jsonSerializer;
// Save/load
persist(persistPath: string = DEFAULT_PERSIST_PATH): void {
// Persist the docstore to a file.
}
// Main interface
abstract docs(): Promise<Record<string, BaseNode>>;
abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>;
abstract getDocument(
docId: string,
raiseError: boolean,
): Promise<BaseNode | undefined>;
abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>;
abstract documentExists(docId: string): Promise<boolean>;
// Hash
abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
abstract getDocumentHash(docId: string): Promise<string | undefined>;
abstract getAllDocumentHashes(): Promise<Record<string, string>>;
// Ref Docs
abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>;
abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>;
abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>;
// Nodes
getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> {
return Promise.all(
nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)),
);
}
async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> {
const doc = await this.getDocument(nodeId, raiseError);
if (!(doc instanceof BaseNode)) {
throw new Error(`Document ${nodeId} is not a Node.`);
}
return doc;
}
async getNodeDict(nodeIdDict: {
[index: number]: string;
}): Promise<Record<number, BaseNode>> {
const result: Record<number, BaseNode> = {};
for (const index in nodeIdDict) {
result[index] = await this.getNode(nodeIdDict[index]!);
}
return result;
}
}
export * from "./base-document-store";
export * from "./kv-document-store";
@@ -1,15 +1,13 @@
import { DEFAULT_NAMESPACE } from "@llamaindex/core/global";
import type { BaseNode } from "@llamaindex/core/schema";
import { ObjectType } from "@llamaindex/core/schema";
import type { RefDocInfo } from "@llamaindex/core/storage/doc-store";
import { DEFAULT_NAMESPACE } from "../../global";
import { BaseNode, ObjectType, type StoredValue } from "../../schema";
import type { BaseKVStore } from "../kv-store";
import {
BaseDocumentStore,
docToJson,
isValidDocJson,
jsonToDoc,
} from "@llamaindex/core/storage/doc-store";
import type { BaseKVStore } from "@llamaindex/core/storage/kv-store";
import _ from "lodash";
type RefDocInfo,
} from "./base-document-store";
type DocMetaData = { docHash: string; refDocId?: string };
@@ -68,7 +66,7 @@ export class KVDocumentStore extends BaseDocumentStore {
extraInfo: {},
};
refDocInfo.nodeIds.push(doc.id_);
if (_.isEmpty(refDocInfo.extraInfo)) {
if (Object.keys(refDocInfo.extraInfo).length === 0) {
refDocInfo.extraInfo = {};
}
await this.kvstore.put(
@@ -88,7 +86,7 @@ export class KVDocumentStore extends BaseDocumentStore {
raiseError: boolean = true,
): Promise<BaseNode | undefined> {
const json = await this.kvstore.get(docId, this.nodeCollection);
if (_.isNil(json)) {
if (this.isNil(json)) {
if (raiseError) {
throw new Error(`docId ${docId} not found.`);
} else {
@@ -103,23 +101,23 @@ export class KVDocumentStore extends BaseDocumentStore {
async getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined> {
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
return refDocInfo ? (_.clone(refDocInfo) as RefDocInfo) : undefined;
return refDocInfo ? (structuredClone(refDocInfo) as RefDocInfo) : undefined;
}
async getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined> {
const refDocInfos = await this.kvstore.getAll(this.refDocCollection);
if (_.isNil(refDocInfos)) {
if (this.isNil(refDocInfos)) {
return;
}
return refDocInfos as Record<string, RefDocInfo>;
}
async refDocExists(refDocId: string): Promise<boolean> {
return !_.isNil(await this.getRefDocInfo(refDocId));
return !this.isNil(await this.getRefDocInfo(refDocId));
}
async documentExists(docId: string): Promise<boolean> {
return !_.isNil(await this.kvstore.get(docId, this.nodeCollection));
return !this.isNil(await this.kvstore.get(docId, this.nodeCollection));
}
private async removeRefDocNode(docId: string): Promise<void> {
@@ -129,13 +127,13 @@ export class KVDocumentStore extends BaseDocumentStore {
}
const refDocId = metadata.refDocId;
if (_.isNil(refDocId)) {
if (this.isNil(refDocId)) {
return;
}
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
if (!_.isNil(refDocInfo)) {
if (refDocInfo.nodeIds.length > 0) {
if (!this.isNil(refDocInfo)) {
if (refDocInfo!.nodeIds.length > 0) {
await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection);
}
await this.kvstore.delete(refDocId, this.metadataCollection);
@@ -164,7 +162,7 @@ export class KVDocumentStore extends BaseDocumentStore {
raiseError: boolean = true,
): Promise<void> {
const refDocInfo = await this.getRefDocInfo(refDocId);
if (_.isNil(refDocInfo)) {
if (this.isNil(refDocInfo)) {
if (raiseError) {
throw new Error(`ref_doc_id ${refDocId} not found.`);
} else {
@@ -172,7 +170,7 @@ export class KVDocumentStore extends BaseDocumentStore {
}
}
for (const docId of refDocInfo.nodeIds) {
for (const docId of refDocInfo!.nodeIds) {
await this.deleteDocument(docId, false, false);
}
@@ -187,7 +185,7 @@ export class KVDocumentStore extends BaseDocumentStore {
async getDocumentHash(docId: string): Promise<string | undefined> {
const metadata = await this.kvstore.get(docId, this.metadataCollection);
return _.get(metadata, "docHash");
return metadata?.docHash;
}
async getAllDocumentHashes(): Promise<Record<string, string>> {
@@ -201,4 +199,8 @@ export class KVDocumentStore extends BaseDocumentStore {
}
return hashes;
}
private isNil(value: RefDocInfo | StoredValue | undefined): boolean {
return value === null || value === undefined;
}
}
+135
View File
@@ -1,3 +1,7 @@
import type { BaseEmbedding } from "../embeddings/base.js";
import { Settings } from "../global";
import type { BaseNode, ModalityType } from "../schema/node.js";
/**
* should compatible with npm:pg and npm:postgres
*/
@@ -12,3 +16,134 @@ export interface IsomorphicDB {
close: () => Promise<void>;
onCloseEvent: (listener: () => void) => void;
}
export interface VectorStoreQueryResult {
nodes?: BaseNode[];
similarities: number[];
ids: string[];
}
export enum VectorStoreQueryMode {
DEFAULT = "default",
SPARSE = "sparse",
HYBRID = "hybrid",
// fit learners
SVM = "svm",
LOGISTIC_REGRESSION = "logistic_regression",
LINEAR_REGRESSION = "linear_regression",
// maximum marginal relevance
MMR = "mmr",
// for Azure AI Search
SEMANTIC_HYBRID = "semantic_hybrid",
}
export enum FilterOperator {
EQ = "==", // default operator (string, number)
IN = "in", // In array (string or number)
GT = ">", // greater than (number)
LT = "<", // less than (number)
NE = "!=", // not equal to (string, number)
GTE = ">=", // greater than or equal to (number)
LTE = "<=", // less than or equal to (number)
NIN = "nin", // Not in array (string or number)
ANY = "any", // Contains any (array of strings)
ALL = "all", // Contains all (array of strings)
TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field)
CONTAINS = "contains", // metadata array contains value (string or number)
IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array)
}
export enum FilterCondition {
AND = "and",
OR = "or",
}
export type MetadataFilterValue = string | number | string[] | number[];
export interface MetadataFilter {
key: string;
value?: MetadataFilterValue;
operator: `${FilterOperator}`; // ==, any, all,...
}
export interface MetadataFilters {
filters: Array<MetadataFilter>;
condition?: `${FilterCondition}`; // and, or
}
export interface MetadataInfo {
name: string;
type: string;
description: string;
}
export interface VectorStoreInfo {
metadataInfo: MetadataInfo[];
contentInfo: string;
}
export interface VectorStoreQuery {
queryEmbedding?: number[];
similarityTopK: number;
docIds?: string[];
queryStr?: string;
mode: VectorStoreQueryMode;
alpha?: number;
filters?: MetadataFilters | undefined;
mmrThreshold?: number;
}
// Supported types of vector stores (for each modality)
export type VectorStoreByType = {
[P in ModalityType]?: BaseVectorStore;
};
export type VectorStoreBaseParams = {
embeddingModel?: BaseEmbedding | undefined;
};
export abstract class BaseVectorStore<Client = unknown> {
embedModel: BaseEmbedding;
abstract storesText: boolean;
isEmbeddingQuery?: boolean;
abstract client(): Client;
abstract add(embeddingResults: BaseNode[]): Promise<string[]>;
abstract delete(refDocId: string, deleteOptions?: object): Promise<void>;
abstract query(
query: VectorStoreQuery,
options?: object,
): Promise<VectorStoreQueryResult>;
protected constructor(params?: VectorStoreBaseParams) {
this.embedModel = params?.embeddingModel ?? Settings.embedModel;
}
}
export const parsePrimitiveValue = (
value?: MetadataFilterValue,
): string | number => {
if (typeof value !== "number" && typeof value !== "string") {
throw new Error("Value must be a string or number");
}
return value;
};
export const parseArrayValue = (
value?: MetadataFilterValue,
): string[] | number[] => {
const isPrimitiveArray =
Array.isArray(value) &&
value.every((v) => typeof v === "string" || typeof v === "number");
if (!isPrimitiveArray) {
throw new Error("Value must be an array of strings or numbers");
}
return value;
};
export const parseNumberValue = (value?: MetadataFilterValue): number => {
if (typeof value !== "number") throw new Error("Value must be a number");
return value;
};
export * from "./utils.js";
@@ -1,6 +1,9 @@
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
import { ObjectType, jsonToNode } from "@llamaindex/core/schema";
import type { MetadataFilterValue } from "./types.js";
import {
ObjectType,
jsonToNode,
type BaseNode,
type Metadata,
} from "../schema";
const DEFAULT_TEXT_KEY = "text";
@@ -91,32 +94,6 @@ export function metadataDictToNode(
}
}
export const parsePrimitiveValue = (
value?: MetadataFilterValue,
): string | number => {
if (typeof value !== "number" && typeof value !== "string") {
throw new Error("Value must be a string or number");
}
return value;
};
export const parseArrayValue = (
value?: MetadataFilterValue,
): string[] | number[] => {
const isPrimitiveArray =
Array.isArray(value) &&
value.every((v) => typeof v === "string" || typeof v === "number");
if (!isPrimitiveArray) {
throw new Error("Value must be an array of strings or numbers");
}
return value;
};
export const parseNumberValue = (value?: MetadataFilterValue): number => {
if (typeof value !== "number") throw new Error("Value must be a number");
return value;
};
export const escapeLikeString = (value: string) => {
return value.replace(/[%_\\]/g, "\\$&");
};
+1 -1
View File
@@ -125,7 +125,7 @@
"@huggingface/transformers": "^3.0.2",
"@types/node": "^22.9.0",
"@types/readable-stream": "^4.0.15",
"bunchee": "6.0.3",
"bunchee": "6.2.0",
"gpt-tokenizer": "^2.6.2",
"pathe": "^1.1.2",
"vitest": "^2.1.5"
+31
View File
@@ -1,5 +1,36 @@
# @llamaindex/experimental
## 0.0.146
### Patch Changes
- Updated dependencies [9f8ad37]
- llamaindex@0.8.34
## 0.0.145
### Patch Changes
- llamaindex@0.8.33
## 0.0.144
### Patch Changes
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [1931bbc]
- llamaindex@0.8.32
## 0.0.143
### Patch Changes
- Updated dependencies [d211b7a]
- Updated dependencies [0ebbfc1]
- llamaindex@0.8.31
## 0.0.142
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.142",
"version": "0.0.146",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+97
View File
@@ -1,5 +1,102 @@
# llamaindex
## 0.8.34
### Patch Changes
- 9f8ad37: fix: missing peer deps in llamaindex
- Updated dependencies [7265f74]
- @llamaindex/openai@0.1.48
- @llamaindex/clip@0.0.32
- @llamaindex/deepinfra@0.0.32
- @llamaindex/groq@0.0.47
- @llamaindex/huggingface@0.0.32
- @llamaindex/vllm@0.0.18
## 0.8.33
### Patch Changes
- Updated dependencies [2019a04]
- Updated dependencies [e38e474]
- Updated dependencies [067a489]
- @llamaindex/openai@0.1.47
- @llamaindex/google@0.0.3
- @llamaindex/anthropic@0.0.31
- @llamaindex/clip@0.0.31
- @llamaindex/deepinfra@0.0.31
- @llamaindex/groq@0.0.46
- @llamaindex/huggingface@0.0.31
- @llamaindex/vllm@0.0.17
## 0.8.32
### Patch Changes
- 34faf48: chore: move vector stores to their own packages
- 4df1fe6: chore: migrate llamaindex llms and embeddings to their own packages
- 9456616: refactor: @llamaindex/postgres
- 1931bbc: refactor: @llamaindex/azure
- Updated dependencies [34faf48]
- Updated dependencies [4df1fe6]
- Updated dependencies [9456616]
- Updated dependencies [d6c270e]
- Updated dependencies [1892e1c]
- Updated dependencies [1931bbc]
- Updated dependencies [8744796]
- @llamaindex/astra@0.0.2
- @llamaindex/chroma@0.0.2
- @llamaindex/milvus@0.0.2
- @llamaindex/mongodb@0.0.2
- @llamaindex/pinecone@0.0.2
- @llamaindex/qdrant@0.0.2
- @llamaindex/upstash@0.0.2
- @llamaindex/weaviate@0.0.2
- @llamaindex/google@0.0.2
- @llamaindex/mistral@0.0.2
- @llamaindex/core@0.4.21
- @llamaindex/cloud@2.0.22
- @llamaindex/openai@0.1.46
- @llamaindex/azure@0.0.2
- @llamaindex/node-parser@0.0.22
- @llamaindex/anthropic@0.0.30
- @llamaindex/clip@0.0.30
- @llamaindex/cohere@0.0.2
- @llamaindex/deepinfra@0.0.30
- @llamaindex/huggingface@0.0.30
- @llamaindex/mixedbread@0.0.2
- @llamaindex/ollama@0.0.37
- @llamaindex/portkey-ai@0.0.30
- @llamaindex/replicate@0.0.30
- @llamaindex/postgres@0.0.30
- @llamaindex/readers@1.0.23
- @llamaindex/groq@0.0.45
- @llamaindex/vllm@0.0.16
## 0.8.31
### Patch Changes
- d211b7a: added support for tool calls with results in message history for athropic agent
- 0ebbfc1: fix: clean up docstore when generating embedding fail
- Updated dependencies [5dec9f9]
- Updated dependencies [fd9c829]
- Updated dependencies [d211b7a]
- @llamaindex/cloud@2.0.21
- @llamaindex/anthropic@0.0.29
- @llamaindex/core@0.4.20
- @llamaindex/node-parser@0.0.21
- @llamaindex/clip@0.0.29
- @llamaindex/deepinfra@0.0.29
- @llamaindex/huggingface@0.0.29
- @llamaindex/ollama@0.0.36
- @llamaindex/openai@0.1.45
- @llamaindex/portkey-ai@0.0.29
- @llamaindex/replicate@0.0.29
- @llamaindex/readers@1.0.22
- @llamaindex/groq@0.0.44
- @llamaindex/vllm@0.0.15
## 0.8.30
### Patch Changes
+16 -50
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.8.30",
"version": "0.8.34",
"license": "MIT",
"type": "module",
"keywords": [
@@ -20,17 +20,6 @@
"llamaindex"
],
"dependencies": {
"@anthropic-ai/sdk": "0.32.1",
"@aws-crypto/sha256-js": "^5.2.0",
"@aws-sdk/client-sso-oidc": "^3.693.0",
"@azure/cosmos": "^4.1.1",
"@azure/identity": "^4.4.1",
"@azure/search-documents": "^12.1.0",
"@datastax/astra-db-ts": "^1.4.1",
"@discoveryjs/json-ext": "^0.6.1",
"@google-cloud/vertexai": "1.9.0",
"@google/generative-ai": "0.21.0",
"@grpc/grpc-js": "^1.12.2",
"@llamaindex/anthropic": "workspace:*",
"@llamaindex/clip": "workspace:*",
"@llamaindex/cloud": "workspace:*",
@@ -46,55 +35,32 @@
"@llamaindex/readers": "workspace:*",
"@llamaindex/replicate": "workspace:*",
"@llamaindex/vllm": "workspace:*",
"@mistralai/mistralai": "^1.3.4",
"@mixedbread-ai/sdk": "^2.2.11",
"@pinecone-database/pinecone": "^4.0.0",
"@qdrant/js-client-rest": "^1.11.0",
"@llamaindex/postgres": "workspace:*",
"@llamaindex/azure": "workspace:*",
"@llamaindex/astra": "workspace:*",
"@llamaindex/milvus": "workspace:*",
"@llamaindex/chroma": "workspace:*",
"@llamaindex/mongodb": "workspace:*",
"@llamaindex/pinecone": "workspace:*",
"@llamaindex/qdrant": "workspace:*",
"@llamaindex/upstash": "workspace:*",
"@llamaindex/weaviate": "workspace:*",
"@llamaindex/google": "workspace:*",
"@llamaindex/mistral": "workspace:*",
"@llamaindex/mixedbread": "workspace:*",
"@llamaindex/cohere": "workspace:*",
"@types/lodash": "^4.17.7",
"@types/node": "^22.9.0",
"@types/pg": "^8.11.8",
"@upstash/vector": "^1.1.5",
"@zilliz/milvus2-sdk-node": "^2.4.6",
"ajv": "^8.17.1",
"assemblyai": "^4.8.0",
"chromadb": "1.9.2",
"chromadb-default-embed": "^2.13.2",
"cohere-ai": "7.14.0",
"gpt-tokenizer": "^2.6.2",
"groq-sdk": "^0.8.0",
"js-tiktoken": "^1.0.14",
"lodash": "^4.17.21",
"magic-bytes.js": "^1.10.0",
"mongodb": "^6.7.0",
"openai": "^4.73.1",
"pathe": "^1.1.2",
"rake-modified": "^1.0.8",
"weaviate-client": "^3.2.3",
"wikipedia": "^2.1.2",
"wink-nlp": "^2.3.0",
"zod": "^3.23.8"
},
"peerDependencies": {
"pg": "^8.12.0",
"pgvector": "0.2.0"
},
"peerDependenciesMeta": {
"pg": {
"optional": true
},
"pgvector": {
"optional": true
}
"gpt-tokenizer": "^2.6.2"
},
"devDependencies": {
"@swc/cli": "^0.5.0",
"@swc/core": "^1.9.2",
"@vercel/postgres": "^0.10.0",
"concurrently": "^9.1.0",
"glob": "^11.0.0",
"pg": "^8.12.0",
"pgvector": "0.2.0",
"postgres": "^3.4.4",
"typescript": "^5.7.2"
},
"engines": {
+3 -8
View File
@@ -13,11 +13,6 @@ import {
} from "@llamaindex/core/node-parser";
import { AsyncLocalStorage } from "@llamaindex/env";
import type { ServiceContext } from "./ServiceContext.js";
import {
getEmbeddedModel,
setEmbeddedModel,
withEmbeddedModel,
} from "./internal/settings/EmbedModel.js";
export type PromptConfig = {
llm?: string;
@@ -84,15 +79,15 @@ class GlobalSettings implements Config {
}
get embedModel(): BaseEmbedding {
return getEmbeddedModel();
return CoreSettings.embedModel;
}
set embedModel(embedModel: BaseEmbedding) {
setEmbeddedModel(embedModel);
CoreSettings.embedModel = embedModel;
}
withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
return withEmbeddedModel(embedModel, fn);
return CoreSettings.withEmbedModel(embedModel, fn);
}
get nodeParser(): NodeParser {
@@ -1,39 +1 @@
import { BaseEmbedding } from "@llamaindex/core/embeddings";
import { GeminiSession, GeminiSessionStore } from "../llm/gemini/base.js";
import { GEMINI_BACKENDS } from "../llm/gemini/types.js";
export enum GEMINI_EMBEDDING_MODEL {
EMBEDDING_001 = "embedding-001",
TEXT_EMBEDDING_004 = "text-embedding-004",
}
/**
* GeminiEmbedding is an alias for Gemini that implements the BaseEmbedding interface.
* Note: Vertex SDK currently does not support embeddings
*/
export class GeminiEmbedding extends BaseEmbedding {
model: GEMINI_EMBEDDING_MODEL;
session: GeminiSession;
constructor(init?: Partial<GeminiEmbedding>) {
super();
this.model = init?.model ?? GEMINI_EMBEDDING_MODEL.EMBEDDING_001;
this.session =
init?.session ??
(GeminiSessionStore.get({
backend: GEMINI_BACKENDS.GOOGLE,
}) as GeminiSession);
}
private async getEmbedding(prompt: string): Promise<number[]> {
const client = this.session.getGenerativeModel({
model: this.model,
});
const result = await client.embedContent(prompt);
return result.embedding.values;
}
getTextEmbedding(text: string): Promise<number[]> {
return this.getEmbedding(text);
}
}
export { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
@@ -1,33 +1,4 @@
import { BaseEmbedding } from "@llamaindex/core/embeddings";
import { MistralAISession } from "../llm/mistral.js";
export enum MistralAIEmbeddingModelType {
MISTRAL_EMBED = "mistral-embed",
}
export class MistralAIEmbedding extends BaseEmbedding {
model: MistralAIEmbeddingModelType;
apiKey?: string;
private session: MistralAISession;
constructor(init?: Partial<MistralAIEmbedding>) {
super();
this.model = MistralAIEmbeddingModelType.MISTRAL_EMBED;
this.session = new MistralAISession(init);
}
private async getMistralAIEmbedding(input: string) {
const client = await this.session.getClient();
const { data } = await client.embeddings({
model: this.model,
input: [input],
});
return data[0].embedding;
}
async getTextEmbedding(text: string): Promise<number[]> {
return this.getMistralAIEmbedding(text);
}
}
export {
MistralAIEmbedding,
MistralAIEmbeddingModelType,
} from "@llamaindex/mistral";
@@ -1,178 +1,4 @@
import { BaseEmbedding, type EmbeddingInfo } from "@llamaindex/core/embeddings";
import { getEnv } from "@llamaindex/env";
import { MixedbreadAI, MixedbreadAIClient } from "@mixedbread-ai/sdk";
type EmbeddingsRequestWithoutInput = Omit<
MixedbreadAI.EmbeddingsRequest,
"input"
>;
/**
* Interface extending EmbeddingsParams with additional
* parameters specific to the MixedbreadAIEmbeddings class.
*/
export interface MixedbreadAIEmbeddingsParams
extends Omit<EmbeddingsRequestWithoutInput, "model"> {
/**
* The model to use for generating embeddings.
* @default {"mixedbread-ai/mxbai-embed-large-v1"}
*/
model?: string;
/**
* The API key to use.
* @default {process.env.MXBAI_API_KEY}
*/
apiKey?: string;
/**
* The base URL for the API.
*/
baseUrl?: string;
/**
* The maximum number of documents to embed in a single request.
* @default {128}
*/
embedBatchSize?: number;
/**
* The embed info for the model.
*/
embedInfo?: EmbeddingInfo;
/**
* The maximum number of retries to attempt.
* @default {3}
*/
maxRetries?: number;
/**
* Timeouts for the request.
*/
timeoutInSeconds?: number;
}
/**
* Class for generating embeddings using the mixedbread ai API.
*
* This class leverages the model "mixedbread-ai/mxbai-embed-large-v1" to generate
* embeddings for text documents. The embeddings can be used for various NLP tasks
* such as similarity comparison, clustering, or as features in machine learning models.
*
* @example
* const mxbai = new MixedbreadAIEmbeddings({ apiKey: 'your-api-key' });
* const texts = ["Baking bread is fun", "I love baking"];
* const result = await mxbai.getTextEmbeddings(texts);
* console.log(result);
*
* @example
* const mxbai = new MixedbreadAIEmbeddings({
* apiKey: 'your-api-key',
* model: 'mixedbread-ai/mxbai-embed-large-v1',
* encodingFormat: MixedbreadAI.EncodingFormat.Binary,
* dimensions: 512,
* normalized: true,
* });
* const query = "Represent this sentence for searching relevant passages: Is baking bread fun?";
* const result = await mxbai.getTextEmbedding(query);
* console.log(result);
*/
export class MixedbreadAIEmbeddings extends BaseEmbedding {
requestParams: EmbeddingsRequestWithoutInput;
requestOptions: MixedbreadAIClient.RequestOptions;
private client: MixedbreadAIClient;
/**
* Constructor for MixedbreadAIEmbeddings.
* @param {Partial<MixedbreadAIEmbeddingsParams>} params - An optional object with properties to configure the instance.
* @throws {Error} If the API key is not provided or found in the environment variables.
* @throws {Error} If the batch size exceeds 256.
*/
constructor(params?: Partial<MixedbreadAIEmbeddingsParams>) {
super();
const apiKey = params?.apiKey ?? getEnv("MXBAI_API_KEY");
if (!apiKey) {
throw new Error(
"mixedbread ai API key not found. Either provide it in the constructor or set the 'MXBAI_API_KEY' environment variable.",
);
}
if (params?.embedBatchSize && params?.embedBatchSize > 256) {
throw new Error(
"The maximum batch size for mixedbread ai embeddings API is 256.",
);
}
this.embedBatchSize = params?.embedBatchSize ?? 128;
if (params?.embedInfo) {
this.embedInfo = params?.embedInfo;
}
this.requestParams = <EmbeddingsRequestWithoutInput>{
model: params?.model ?? "mixedbread-ai/mxbai-embed-large-v1",
normalized: params?.normalized,
dimensions: params?.dimensions,
encodingFormat: params?.encodingFormat,
truncationStrategy: params?.truncationStrategy,
prompt: params?.prompt,
};
this.requestOptions = {
timeoutInSeconds: params?.timeoutInSeconds,
maxRetries: params?.maxRetries ?? 3,
// Support for this already exists in the python sdk and will be added to the js sdk soon
// @ts-expect-error fixme
additionalHeaders: {
"user-agent": "@mixedbread-ai/llamaindex-ts-sdk",
},
};
this.client = new MixedbreadAIClient(
params?.baseUrl
? {
apiKey,
environment: params?.baseUrl,
}
: {
apiKey,
},
);
}
/**
* Generates an embedding for a single text.
* @param {string} text - A string to generate an embedding for.
* @returns {Promise<number[]>} A Promise that resolves to an array of numbers representing the embedding.
*
* @example
* const query = "Represent this sentence for searching relevant passages: Is baking bread fun?";
* const result = await mxbai.getTextEmbedding(text);
* console.log(result);
*/
async getTextEmbedding(text: string): Promise<number[]> {
return (await this.getTextEmbeddings([text]))[0]!;
}
/**
* Generates embeddings for an array of texts.
* @param {string[]} texts - An array of strings to generate embeddings for.
* @returns {Promise<Array<number[]>>} A Promise that resolves to an array of embeddings.
*
* @example
* const texts = ["Baking bread is fun", "I love baking"];
* const result = await mxbai.getTextEmbeddings(texts);
* console.log(result);
*/
getTextEmbeddings = async (texts: string[]): Promise<Array<number[]>> => {
if (texts.length === 0) {
return [];
}
const response = await this.client.embeddings(
{
...this.requestParams,
input: texts,
},
this.requestOptions,
);
return response.data.map((d) => d.embedding as number[]);
};
}
export {
MixedbreadAIEmbeddings,
type MixedbreadAIEmbeddingsParams,
} from "@llamaindex/mixedbread";
+1 -1
View File
@@ -2,7 +2,7 @@ export * from "@llamaindex/core/embeddings";
export { ClipEmbedding, ClipEmbeddingModelType } from "./ClipEmbedding.js";
export { DeepInfraEmbedding } from "./DeepInfraEmbedding.js";
export { FireworksEmbedding } from "./fireworks.js";
export * from "./GeminiEmbedding.js";
export { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "./GeminiEmbedding.js";
export * from "./HuggingFaceEmbedding.js";
export * from "./JinaAIEmbedding.js";
export * from "./MistralAIEmbedding.js";
+4 -1
View File
@@ -1,12 +1,15 @@
//#region initial setup for OpenAI
import { OpenAI } from "@llamaindex/openai";
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
import { Settings } from "./Settings.js";
try {
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
Settings.llm;
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
Settings.embedModel;
} catch {
Settings.llm = new OpenAI();
Settings.embedModel = new OpenAIEmbedding();
}
//#endregion
+5 -3
View File
@@ -7,12 +7,14 @@ export {
HuggingFaceEmbeddingModelType,
} from "./embeddings/HuggingFaceEmbedding.js";
export { type VertexGeminiSessionOptions } from "./llm/gemini/types.js";
export { GeminiVertexSession } from "./llm/gemini/vertex.js";
export {
GeminiVertexSession,
type VertexGeminiSessionOptions,
} from "@llamaindex/google";
// Expose AzureDynamicSessionTool for node.js runtime only
export { AzureDynamicSessionTool } from "@llamaindex/azure";
export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js";
export { AzureDynamicSessionTool } from "./tools/AzureDynamicSessionTool.node.js";
// Don't export vector store modules for non-node.js runtime on top level,
// as we cannot guarantee that they will work in other environments
@@ -19,6 +19,7 @@ import {
} from "@llamaindex/core/schema";
import type { BaseIndexStore } from "@llamaindex/core/storage/index-store";
import { extractText } from "@llamaindex/core/utils";
import { VectorStoreQueryMode } from "@llamaindex/core/vector-store";
import type { ServiceContext } from "../../ServiceContext.js";
import { nodeParserFromSettingsOrContext } from "../../Settings.js";
import { RetrieverQueryEngine } from "../../engines/query/RetrieverQueryEngine.js";
@@ -38,7 +39,6 @@ import type {
VectorStoreByType,
VectorStoreQueryResult,
} from "../../vector-store/index.js";
import { VectorStoreQueryMode } from "../../vector-store/types.js";
import type { BaseIndexInit } from "../BaseIndex.js";
import { BaseIndex } from "../BaseIndex.js";
@@ -237,7 +237,12 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
if (args.logProgress) {
console.log("Finished parsing documents.");
}
return await this.init(args);
try {
return await this.init(args);
} catch (error) {
await docStoreStrategy.rollback(args.storageContext.docStore, args.nodes);
throw error;
}
}
static async fromVectorStores(
@@ -10,7 +10,7 @@ import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
import type {
BaseVectorStore,
VectorStoreByType,
} from "../vector-store/types.js";
} from "@llamaindex/core/vector-store";
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
import {
DocStoreStrategy,
@@ -1,10 +1,11 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import { BaseNode } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
import { RollbackableTransformComponent } from "./rollback.js";
/**
* Handle doc store duplicates by checking all hashes.
*/
export class DuplicatesStrategy extends TransformComponent {
export class DuplicatesStrategy extends RollbackableTransformComponent {
private docStore: BaseDocumentStore;
constructor(docStore: BaseDocumentStore) {
@@ -1,13 +1,14 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import { BaseNode } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
import type { BaseVectorStore } from "../../vector-store/types.js";
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
import { classify } from "./classify.js";
import { RollbackableTransformComponent } from "./rollback.js";
/**
* Handle docstore upserts by checking hashes and ids.
* Identify missing docs and delete them from docstore and vector store
*/
export class UpsertsAndDeleteStrategy extends TransformComponent {
export class UpsertsAndDeleteStrategy extends RollbackableTransformComponent {
protected docStore: BaseDocumentStore;
protected vectorStores: BaseVectorStore[] | undefined;
@@ -1,12 +1,13 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import { BaseNode } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
import type { BaseVectorStore } from "../../vector-store/types.js";
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
import { classify } from "./classify.js";
import { RollbackableTransformComponent } from "./rollback.js";
/**
* Handles doc store upserts by checking hashes and ids.
*/
export class UpsertsStrategy extends TransformComponent {
export class UpsertsStrategy extends RollbackableTransformComponent {
protected docStore: BaseDocumentStore;
protected vectorStores: BaseVectorStore[] | undefined;
@@ -1,9 +1,9 @@
import { TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
import type { BaseVectorStore } from "../../vector-store/types.js";
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
import { UpsertsStrategy } from "./UpsertsStrategy.js";
import { RollbackableTransformComponent } from "./rollback.js";
/**
* Document de-deduplication strategies work by comparing the hashes or ids stored in the document store.
@@ -19,7 +19,7 @@ export enum DocStoreStrategy {
NONE = "none", // no-op strategy
}
class NoOpStrategy extends TransformComponent {
class NoOpStrategy extends RollbackableTransformComponent {
constructor() {
super(async (nodes) => nodes);
}
@@ -29,7 +29,7 @@ export function createDocStoreStrategy(
docStoreStrategy: DocStoreStrategy,
docStore?: BaseDocumentStore,
vectorStores: BaseVectorStore[] = [],
): TransformComponent {
): RollbackableTransformComponent {
if (docStoreStrategy === DocStoreStrategy.NONE) {
return new NoOpStrategy();
}
@@ -0,0 +1,19 @@
import { BaseNode, TransformComponent } from "@llamaindex/core/schema";
import type { BaseDocumentStore } from "../../index.edge.js";
import { classify } from "./classify.js";
export class RollbackableTransformComponent extends TransformComponent {
// Remove unused docs from the doc store. It is useful in case
// generating embeddings fails and we want to remove the unused docs
// TODO: override this in UpsertsStrategy if we want to revert removed docs also
public async rollback(
docStore: BaseDocumentStore,
nodes: BaseNode[],
): Promise<void> {
const { unusedDocs } = await classify(docStore, nodes);
for (const docId of unusedDocs) {
await docStore.deleteDocument(docId, false);
}
docStore.persist();
}
}
+1
View File
@@ -0,0 +1 @@
export * from "@llamaindex/google";
+3 -2
View File
@@ -6,11 +6,12 @@ export {
Anthropic,
} from "./anthropic.js";
export { FireworksLLM } from "./fireworks.js";
export { Gemini, GeminiSession } from "./gemini/base.js";
export {
GEMINI_MODEL,
Gemini,
GeminiSession,
type GoogleGeminiSessionOptions,
} from "./gemini/types.js";
} from "./google.js";
export * from "./groq.js";
export { HuggingFaceInferenceAPI, HuggingFaceLLM } from "./huggingface.js";
export {
+1 -138
View File
@@ -1,138 +1 @@
import {
BaseLLM,
type ChatMessage,
type ChatResponse,
type ChatResponseChunk,
type LLMChatParamsNonStreaming,
type LLMChatParamsStreaming,
} from "@llamaindex/core/llms";
import { getEnv } from "@llamaindex/env";
export const ALL_AVAILABLE_MISTRAL_MODELS = {
"mistral-tiny": { contextWindow: 32000 },
"mistral-small": { contextWindow: 32000 },
"mistral-medium": { contextWindow: 32000 },
};
export class MistralAISession {
apiKey: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
private client: any;
constructor(init?: { apiKey?: string | undefined }) {
if (init?.apiKey) {
this.apiKey = init?.apiKey;
} else {
this.apiKey = getEnv("MISTRAL_API_KEY")!;
}
if (!this.apiKey) {
throw new Error("Set Mistral API key in MISTRAL_API_KEY env variable"); // Overriding MistralAI package's error message
}
}
async getClient() {
const { Mistral } = await import("@mistralai/mistralai");
if (!this.client) {
this.client = new Mistral({
apiKey: this.apiKey,
});
}
return this.client;
}
}
/**
* MistralAI LLM implementation
*/
export class MistralAI extends BaseLLM {
// Per completion MistralAI params
model: keyof typeof ALL_AVAILABLE_MISTRAL_MODELS;
temperature: number;
topP: number;
maxTokens?: number | undefined;
apiKey?: string;
safeMode: boolean;
randomSeed?: number | undefined;
private session: MistralAISession;
constructor(init?: Partial<MistralAI>) {
super();
this.model = init?.model ?? "mistral-small";
this.temperature = init?.temperature ?? 0.1;
this.topP = init?.topP ?? 1;
this.maxTokens = init?.maxTokens ?? undefined;
this.safeMode = init?.safeMode ?? false;
this.randomSeed = init?.randomSeed ?? undefined;
this.session = new MistralAISession(init);
}
get metadata() {
return {
model: this.model,
temperature: this.temperature,
topP: this.topP,
maxTokens: this.maxTokens,
contextWindow: ALL_AVAILABLE_MISTRAL_MODELS[this.model].contextWindow,
tokenizer: undefined,
};
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
private buildParams(messages: ChatMessage[]): any {
return {
model: this.model,
temperature: this.temperature,
maxTokens: this.maxTokens,
topP: this.topP,
safeMode: this.safeMode,
randomSeed: this.randomSeed,
messages,
};
}
chat(
params: LLMChatParamsStreaming,
): Promise<AsyncIterable<ChatResponseChunk>>;
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
async chat(
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
const { messages, stream } = params;
// Streaming
if (stream) {
return this.streamChat(params);
}
// Non-streaming
const client = await this.session.getClient();
const response = await client.chat(this.buildParams(messages));
const message = response.choices[0].message;
return {
raw: response,
message,
};
}
protected async *streamChat({
messages,
}: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
const client = await this.session.getClient();
const chunkStream = await client.chatStream(this.buildParams(messages));
//Indices
let idx_counter: number = 0;
for await (const part of chunkStream) {
if (!part.choices.length) continue;
part.choices[0].index = idx_counter;
idx_counter++;
yield {
raw: part,
delta: part.choices[0].delta.content ?? "",
};
}
return;
}
}
export * from "@llamaindex/mistral";
@@ -1,3 +1,6 @@
export * from "./CohereRerank.js";
export * from "@llamaindex/cohere";
export {
MixedbreadAIReranker,
type MixedbreadAIRerankerParams,
} from "@llamaindex/mixedbread";
export * from "./JinaAIReranker.js";
export * from "./MixedbreadAIReranker.js";
@@ -9,13 +9,13 @@ import {
BaseIndexStore,
SimpleIndexStore,
} from "@llamaindex/core/storage/index-store";
import { path } from "@llamaindex/env";
import type { ServiceContext } from "../ServiceContext.js";
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
import type {
BaseVectorStore,
VectorStoreByType,
} from "../vector-store/types.js";
} from "@llamaindex/core/vector-store";
import { path } from "@llamaindex/env";
import type { ServiceContext } from "../ServiceContext.js";
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
export interface StorageContext {
@@ -3,13 +3,13 @@ import {
DEFAULT_NAMESPACE,
DEFAULT_PERSIST_DIR,
} from "@llamaindex/core/global";
import { KVDocumentStore } from "@llamaindex/core/storage/doc-store";
import {
BaseInMemoryKVStore,
SimpleKVStore,
} from "@llamaindex/core/storage/kv-store";
import { path } from "@llamaindex/env";
import _ from "lodash";
import { KVDocumentStore } from "./KVDocumentStore.js";
// eslint-disable-next-line @typescript-eslint/no-explicit-any
type SaveDict = Record<string, any>;
+7 -11
View File
@@ -1,18 +1,14 @@
export * from "@llamaindex/azure/storage";
export * from "@llamaindex/core/storage/chat-store";
export * from "@llamaindex/core/storage/doc-store";
export * from "@llamaindex/core/storage/index-store";
export * from "@llamaindex/core/storage/kv-store";
export * from "./chatStore/AzureCosmosMongovCoreChatStore.js";
export * from "./chatStore/AzureCosmosNoSqlChatStore.js";
export * from "./docStore/AzureCosmosMongovCoreDocumentStore.js";
export * from "./docStore/AzureCosmosNoSqlDocumentStore.js";
export { PostgresDocumentStore } from "./docStore/PostgresDocumentStore.js";
export {
PostgresDocumentStore,
PostgresIndexStore,
PostgresKVStore,
} from "@llamaindex/postgres";
export { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
export * from "./FileSystem.js";
export * from "./indexStore/AzureCosmosMongovCoreIndexStore.js";
export * from "./indexStore/AzureCosmosNoSqlIndexStore.js";
export { PostgresIndexStore } from "./indexStore/PostgresIndexStore.js";
export * from "./kvStore/AzureCosmosMongovCoreKVStore.js";
export * from "./kvStore/AzureCosmosNoSqlKVStore.js";
export { PostgresKVStore } from "./kvStore/PostgresKVStore.js";
export * from "./StorageContext.js";
+1 -10
View File
@@ -1,18 +1,15 @@
import {
AzureDynamicSessionTool,
type AzureDynamicSessionToolParams,
} from "./AzureDynamicSessionTool.node.js";
import { WikipediaTool, type WikipediaToolParams } from "./WikipediaTool.js";
} from "@llamaindex/azure";
// eslint-disable-next-line @typescript-eslint/no-namespace
export namespace ToolsFactory {
type ToolsMap = {
[Tools.Wikipedia]: typeof WikipediaTool;
[Tools.AzureCodeInterpreter]: typeof AzureDynamicSessionTool;
};
export enum Tools {
Wikipedia = "wikipedia.WikipediaToolSpec",
AzureCodeInterpreter = "azure_code_interpreter.AzureCodeInterpreterToolSpec",
}
@@ -20,12 +17,6 @@ export namespace ToolsFactory {
key: Tool,
...params: ConstructorParameters<ToolsMap[Tool]>
): Promise<InstanceType<ToolsMap[Tool]>> {
if (key === Tools.Wikipedia) {
return new WikipediaTool(
...(params as WikipediaToolParams[]),
) as InstanceType<ToolsMap[Tool]>;
}
if (key === Tools.AzureCodeInterpreter) {
return new AzureDynamicSessionTool(
...(params as AzureDynamicSessionToolParams[]),
-1
View File
@@ -1,3 +1,2 @@
export * from "@llamaindex/core/tools";
export * from "./QueryEngineTool.js";
export * from "./WikipediaTool.js";
@@ -1,270 +1 @@
import {
Collection,
DataAPIClient,
Db,
type Filter,
type FindOptions,
type SomeDoc,
} from "@datastax/astra-db-ts";
import type { BaseNode } from "@llamaindex/core/schema";
import { MetadataMode } from "@llamaindex/core/schema";
import { getEnv } from "@llamaindex/env";
import {
BaseVectorStore,
FilterCondition,
FilterOperator,
type MetadataFilter,
type MetadataFilters,
type VectorStoreBaseParams,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import {
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
} from "./utils.js";
export class AstraDBVectorStore extends BaseVectorStore {
storesText: boolean = true;
flatMetadata: boolean = true;
idKey: string;
contentKey: string;
private astraClient: DataAPIClient;
private astraDB: Db;
private collection: Collection | undefined;
constructor(
init?: Partial<AstraDBVectorStore> & {
params?: {
token: string;
endpoint: string;
namespace?: string;
};
} & VectorStoreBaseParams,
) {
super(init);
const token = init?.params?.token ?? getEnv("ASTRA_DB_APPLICATION_TOKEN");
const endpoint = init?.params?.endpoint ?? getEnv("ASTRA_DB_API_ENDPOINT");
if (!token) {
throw new Error(
"Must specify ASTRA_DB_APPLICATION_TOKEN via env variable.",
);
}
if (!endpoint) {
throw new Error("Must specify ASTRA_DB_API_ENDPOINT via env variable.");
}
const namespace =
init?.params?.namespace ??
getEnv("ASTRA_DB_NAMESPACE") ??
"default_keyspace";
this.astraClient = new DataAPIClient(token, {
caller: ["LlamaIndexTS"],
});
this.astraDB = this.astraClient.db(endpoint, { namespace });
this.idKey = init?.idKey ?? "_id";
this.contentKey = init?.contentKey ?? "content";
}
/**
* Create a new collection in your Astra DB vector database and connects to it.
* You must call this method or `connect` before adding, deleting, or querying.
*
* @param collection - Your new collection's name
* @param options - CreateCollectionOptions used to set the number of vector dimensions and similarity metric
* @returns Promise that resolves if the creation did not throw an error.
*/
async createAndConnect(
collection: string,
options?: Parameters<Db["createCollection"]>[1],
): Promise<void> {
this.collection = await this.astraDB.createCollection(collection, options);
console.debug("Created Astra DB collection");
return;
}
/**
* Connect to an existing collection in your Astra DB vector database.
* You must call this method or `createAndConnect` before adding, deleting, or querying.
*
* @param collection - Your existing collection's name
* @returns Promise that resolves if the connection did not throw an error.
*/
async connect(collection: string): Promise<void> {
this.collection = await this.astraDB.collection(collection);
console.debug("Connected to Astra DB collection");
return;
}
/**
* Get an instance of your Astra DB client.
* @returns the AstraDB client
*/
client(): DataAPIClient {
return this.astraClient;
}
/**
* Add your document(s) to your Astra DB collection.
*
* @returns an array of node ids which were added
*/
async add(nodes: BaseNode[]): Promise<string[]> {
if (!this.collection) {
throw new Error("Must connect to collection before adding.");
}
const collection = this.collection;
if (!nodes || nodes.length === 0) {
return [];
}
const dataToInsert = nodes.map((node) => {
const metadata = nodeToMetadata(
node,
true,
this.contentKey,
this.flatMetadata,
);
return {
$vector: node.getEmbedding(),
[this.idKey]: node.id_,
[this.contentKey]: node.getContent(MetadataMode.NONE),
...metadata,
};
});
console.debug(`Adding ${dataToInsert.length} rows to table`);
const insertResult = await collection.insertMany(dataToInsert);
return insertResult.insertedIds as string[];
}
/**
* Delete a document from your Astra DB collection.
*
* @param refDocId - The id of the document to delete
* @param deleteOptions - DeleteOneOptions to pass to the delete query
* @returns Promise that resolves if the delete query did not throw an error.
*/
async delete(
refDocId: string,
deleteOptions?: Parameters<Collection["deleteOne"]>[1],
): Promise<void> {
if (!this.collection) {
throw new Error("Must connect to collection before deleting.");
}
const collection = this.collection;
console.debug(`Deleting row with id ${refDocId}`);
await collection.deleteOne(
{
_id: refDocId,
},
deleteOptions,
);
}
/**
* Query documents from your Astra DB collection to get the closest match to your embedding.
*
* @param query - VectorStoreQuery
* @param options - FindOptions
*/
async query(
query: VectorStoreQuery,
options?: Parameters<Collection["find"]>[1],
): Promise<VectorStoreQueryResult> {
if (!this.collection) {
throw new Error("Must connect to collection before querying.");
}
const collection = this.collection;
const astraFilter = this.toAstraFilter(query.filters);
const cursor = await collection.find(astraFilter, <FindOptions>{
...options,
sort: query.queryEmbedding
? { $vector: query.queryEmbedding }
: options?.sort,
limit: query.similarityTopK,
includeSimilarity: true,
});
const nodes: BaseNode[] = [];
const ids: string[] = [];
const similarities: number[] = [];
for await (const row of cursor) {
const {
$vector: embedding,
$similarity: similarity,
[this.idKey]: id,
[this.contentKey]: content,
...metadata
} = row;
const node = metadataDictToNode(metadata, {
fallback: {
id,
text: content,
...metadata,
},
});
node.setContent(content);
ids.push(id);
similarities.push(similarity);
nodes.push(node);
}
return {
similarities,
ids,
nodes,
};
}
private toAstraFilter(filters?: MetadataFilters): Filter<SomeDoc> {
if (!filters || filters.filters?.length === 0) return {};
const condition = filters.condition ?? FilterCondition.AND;
const listFilter = filters.filters.map((f) => this.buildFilterItem(f));
if (condition === FilterCondition.OR) return { $or: listFilter };
if (condition === FilterCondition.AND) return { $and: listFilter };
throw new Error(`Not supported filter condition: ${condition}`);
}
private buildFilterItem(filter: MetadataFilter): Filter<SomeDoc> {
const { key, operator, value } = filter;
switch (operator) {
case FilterOperator.EQ:
return { [key]: value };
case FilterOperator.NE:
return { [key]: { $ne: value } };
case FilterOperator.GT:
return { [key]: { $gt: value } };
case FilterOperator.LT:
return { [key]: { $lt: value } };
case FilterOperator.GTE:
return { [key]: { $gte: value } };
case FilterOperator.LTE:
return { [key]: { $lte: value } };
case FilterOperator.IN:
return { [key]: { $in: parseArrayValue(value) } };
case FilterOperator.NIN:
return { [key]: { $nin: parseArrayValue(value) } };
case FilterOperator.IS_EMPTY:
return { [key]: { $size: 0 } };
default:
throw new Error(`Not supported filter operator: ${operator}`);
}
}
}
export * from "@llamaindex/astra";
@@ -0,0 +1 @@
export * from "@llamaindex/azure";
@@ -1,235 +1 @@
import type { BaseNode } from "@llamaindex/core/schema";
import { MetadataMode } from "@llamaindex/core/schema";
import {
ChromaClient,
type ChromaClientParams,
type DeleteParams,
type QueryRecordsParams,
type QueryResponse,
type Where,
type WhereDocument,
} from "chromadb";
import {
BaseVectorStore,
FilterCondition,
FilterOperator,
VectorStoreQueryMode,
type MetadataFilters,
type VectorStoreBaseParams,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
type ChromaDeleteOptions = {
where?: Where;
whereDocument?: WhereDocument;
};
type ChromaQueryOptions = {
whereDocument?: WhereDocument;
};
type Collection = Awaited<ReturnType<ChromaClient["getOrCreateCollection"]>>;
const DEFAULT_TEXT_KEY = "text";
type ChromaFilterCondition = "$and" | "$or";
type ChromaFilterOperator =
| "$eq"
| "$ne"
| "$gt"
| "$lt"
| "$gte"
| "$lte"
| "$in"
| "$nin";
export class ChromaVectorStore extends BaseVectorStore {
storesText: boolean = true;
flatMetadata: boolean = true;
textKey: string;
private chromaClient: ChromaClient;
private collection: Collection | null = null;
private collectionName: string;
constructor(
init: {
collectionName: string;
textKey?: string;
chromaClientParams?: ChromaClientParams;
} & VectorStoreBaseParams,
) {
super(init);
this.collectionName = init.collectionName;
this.chromaClient = new ChromaClient(init.chromaClientParams);
this.textKey = init.textKey ?? DEFAULT_TEXT_KEY;
}
client(): ChromaClient {
return this.chromaClient;
}
async getCollection(): Promise<Collection> {
if (!this.collection) {
const coll = await this.chromaClient.getOrCreateCollection({
name: this.collectionName,
});
this.collection = coll;
}
return this.collection;
}
private getDataToInsert(nodes: BaseNode[]) {
const metadatas = nodes.map((node) =>
nodeToMetadata(node, true, this.textKey, this.flatMetadata),
);
return {
embeddings: nodes.map((node) => node.getEmbedding()),
ids: nodes.map((node) => node.id_),
metadatas,
documents: nodes.map((node) => node.getContent(MetadataMode.NONE)),
};
}
async add(nodes: BaseNode[]): Promise<string[]> {
if (!nodes || nodes.length === 0) {
return [];
}
const dataToInsert = this.getDataToInsert(nodes);
const collection = await this.getCollection();
await collection.add(dataToInsert);
return nodes.map((node) => node.id_);
}
async delete(
refDocId: string,
deleteOptions?: ChromaDeleteOptions,
): Promise<void> {
const collection = await this.getCollection();
await collection.delete(<DeleteParams>{
ids: [refDocId],
where: deleteOptions?.where,
whereDocument: deleteOptions?.whereDocument,
});
}
private transformChromaFilterCondition(
condition: FilterCondition,
): ChromaFilterCondition {
switch (condition) {
case FilterCondition.AND:
return "$and";
case FilterCondition.OR:
return "$or";
default:
throw new Error(`Filter condition ${condition} not supported`);
}
}
private transformChromaFilterOperator(
operator: FilterOperator,
): ChromaFilterOperator {
switch (operator) {
case FilterOperator.EQ:
return "$eq";
case FilterOperator.NE:
return "$ne";
case FilterOperator.GT:
return "$gt";
case FilterOperator.LT:
return "$lt";
case FilterOperator.GTE:
return "$gte";
case FilterOperator.LTE:
return "$lte";
case FilterOperator.IN:
return "$in";
case FilterOperator.NIN:
return "$nin";
default:
throw new Error(`Filter operator ${operator} not supported`);
}
}
private toChromaFilter(filters: MetadataFilters): Where {
const chromaFilter: Where = {};
const filtersList: Where[] = [];
const condition = filters.condition
? this.transformChromaFilterCondition(
filters.condition as FilterCondition,
)
: "$and";
if (filters.filters) {
for (const filter of filters.filters) {
if (filter.operator) {
filtersList.push({
[filter.key]: {
[this.transformChromaFilterOperator(
filter.operator as FilterOperator,
)]: filter.value,
},
});
} else {
filtersList.push({ [filter.key]: filter.value });
}
}
if (filtersList.length === 1) {
return filtersList[0]!;
} else if (filtersList.length > 1) {
chromaFilter[condition] = filtersList;
}
}
return chromaFilter;
}
async query(
query: VectorStoreQuery,
options?: ChromaQueryOptions,
): Promise<VectorStoreQueryResult> {
if (query.docIds) {
throw new Error("ChromaDB does not support querying by docIDs");
}
if (query.mode != VectorStoreQueryMode.DEFAULT) {
throw new Error("ChromaDB does not support querying by mode");
}
let chromaWhere: Where = {};
if (query.filters) {
chromaWhere = this.toChromaFilter(query.filters);
}
const collection = await this.getCollection();
const queryResponse: QueryResponse = await collection.query(<
QueryRecordsParams
>{
queryEmbeddings: query.queryEmbedding ?? undefined,
queryTexts: query.queryStr ?? undefined,
nResults: query.similarityTopK,
where: Object.keys(chromaWhere).length ? chromaWhere : undefined,
whereDocument: options?.whereDocument,
//ChromaDB doesn't return the result embeddings by default so we need to include them
include: ["distances", "metadatas", "documents", "embeddings"],
});
const vectorStoreQueryResult: VectorStoreQueryResult = {
nodes: queryResponse.ids[0]!.map((id, index) => {
const text = (queryResponse.documents as string[][])[0]![index];
const metaData = queryResponse.metadatas[0]![index] ?? {};
const node = metadataDictToNode(metaData);
node.setContent(text);
return node;
}),
similarities: (queryResponse.distances as number[][])[0]!.map(
(distance) => 1 - distance,
),
ids: queryResponse.ids[0]!,
};
return vectorStoreQueryResult;
}
}
export * from "@llamaindex/chroma";
@@ -1,279 +1 @@
import type { ChannelOptions } from "@grpc/grpc-js";
import { BaseNode, MetadataMode, type Metadata } from "@llamaindex/core/schema";
import { getEnv } from "@llamaindex/env";
import {
DataType,
MilvusClient,
type ClientConfig,
type DeleteReq,
type RowData,
type SearchSimpleReq,
} from "@zilliz/milvus2-sdk-node";
import {
BaseVectorStore,
type MetadataFilters,
type VectorStoreBaseParams,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import {
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
parsePrimitiveValue,
} from "./utils.js";
function parseScalarFilters(scalarFilters: MetadataFilters): string {
const condition = scalarFilters.condition ?? "and";
const filters: string[] = [];
for (const filter of scalarFilters.filters) {
switch (filter.operator) {
case "==":
case "!=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} "${parsePrimitiveValue(filter.value)}"`,
);
break;
}
case "in": {
const filterValue = parseArrayValue(filter.value)
.map((v) => `"${v}"`)
.join(", ");
filters.push(
`metadata["${filter.key}"] ${filter.operator} [${filterValue}]`,
);
break;
}
case "nin": {
// Milvus does not support `nin` operator, so we need to manually check every value
// Expected: not metadata["key"] != "value1" and not metadata["key"] != "value2"
const filterStr = parseArrayValue(filter.value)
.map((v) => `metadata["${filter.key}"] != "${v}"`)
.join(" && ");
filters.push(filterStr);
break;
}
case "<":
case "<=":
case ">":
case ">=": {
filters.push(
`metadata["${filter.key}"] ${filter.operator} ${parsePrimitiveValue(filter.value)}`,
);
break;
}
default:
throw new Error(`Operator ${filter.operator} is not supported.`);
}
}
return filters.join(` ${condition} `);
}
export class MilvusVectorStore extends BaseVectorStore {
public storesText: boolean = true;
public isEmbeddingQuery?: boolean = false;
private flatMetadata: boolean = true;
private milvusClient: MilvusClient;
private collectionInitialized = false;
private collectionName: string;
private idKey: string;
private contentKey: string;
private metadataKey: string;
private embeddingKey: string;
constructor(
init?: Partial<{ milvusClient: MilvusClient }> &
VectorStoreBaseParams & {
params?: {
configOrAddress: ClientConfig | string;
ssl?: boolean;
username?: string;
password?: string;
channelOptions?: ChannelOptions;
};
collection?: string;
idKey?: string;
contentKey?: string;
metadataKey?: string;
embeddingKey?: string;
},
) {
super(init);
if (init?.milvusClient) {
this.milvusClient = init.milvusClient;
} else {
const configOrAddress =
init?.params?.configOrAddress ?? getEnv("MILVUS_ADDRESS");
const ssl = init?.params?.ssl ?? getEnv("MILVUS_SSL") === "true";
const username = init?.params?.username ?? getEnv("MILVUS_USERNAME");
const password = init?.params?.password ?? getEnv("MILVUS_PASSWORD");
if (!configOrAddress) {
throw new Error("Must specify MILVUS_ADDRESS via env variable.");
}
this.milvusClient = new MilvusClient(
configOrAddress,
ssl,
username,
password,
init?.params?.channelOptions,
);
}
this.collectionName = init?.collection ?? "llamacollection";
this.idKey = init?.idKey ?? "id";
this.contentKey = init?.contentKey ?? "content";
this.metadataKey = init?.metadataKey ?? "metadata";
this.embeddingKey = init?.embeddingKey ?? "embedding";
}
public client(): MilvusClient {
return this.milvusClient;
}
private async createCollection() {
await this.milvusClient.createCollection({
collection_name: this.collectionName,
fields: [
{
name: this.idKey,
data_type: DataType.VarChar,
is_primary_key: true,
max_length: 200,
},
{
name: this.embeddingKey,
data_type: DataType.FloatVector,
dim: 1536,
},
{
name: this.contentKey,
data_type: DataType.VarChar,
max_length: 9000,
},
{
name: this.metadataKey,
data_type: DataType.JSON,
},
],
});
await this.milvusClient.createIndex({
collection_name: this.collectionName,
field_name: this.embeddingKey,
});
}
private async ensureCollection(): Promise<void> {
if (!this.collectionInitialized) {
await this.milvusClient.connectPromise;
// Check collection exists
const isCollectionExist = await this.milvusClient.hasCollection({
collection_name: this.collectionName,
});
if (!isCollectionExist.value) {
await this.createCollection();
}
await this.milvusClient.loadCollectionSync({
collection_name: this.collectionName,
});
this.collectionInitialized = true;
}
}
public async add(nodes: BaseNode<Metadata>[]): Promise<string[]> {
await this.ensureCollection();
const result = await this.milvusClient.insert({
collection_name: this.collectionName,
data: nodes.map((node) => {
const metadata = nodeToMetadata(
node,
true,
this.contentKey,
this.flatMetadata,
);
const entry: RowData = {
[this.idKey]: node.id_,
[this.embeddingKey]: node.getEmbedding(),
[this.contentKey]: node.getContent(MetadataMode.NONE),
[this.metadataKey]: metadata,
};
return entry;
}),
});
if (!result.IDs) {
return [];
}
if ("int_id" in result.IDs) {
return result.IDs.int_id.data.map((i) => String(i));
}
return result.IDs.str_id.data.map((s) => String(s));
}
public async delete(
refDocId: string,
deleteOptions?: Omit<DeleteReq, "ids">,
): Promise<void> {
await this.ensureCollection();
await this.milvusClient.delete({
ids: [refDocId],
collection_name: this.collectionName,
...deleteOptions,
});
}
public toMilvusFilter(filters?: MetadataFilters): string | undefined {
if (!filters) return undefined;
// TODO: Milvus also support standard filters, we can add it later
return parseScalarFilters(filters);
}
public async query(
query: VectorStoreQuery,
_options?: object,
): Promise<VectorStoreQueryResult> {
await this.ensureCollection();
const found = await this.milvusClient.search(<SearchSimpleReq>{
collection_name: this.collectionName,
limit: query.similarityTopK,
vector: query.queryEmbedding,
filter: this.toMilvusFilter(query.filters),
});
const nodes: BaseNode<Metadata>[] = [];
const similarities: number[] = [];
const ids: string[] = [];
found.results.forEach((result) => {
const node = metadataDictToNode(result.metadata);
node.setContent(result.content);
nodes.push(node);
similarities.push(result.score);
ids.push(String(result.id));
});
return {
nodes,
similarities,
ids,
};
}
public async persist() {
// no need to do anything
}
}
export * from "@llamaindex/milvus";
@@ -1,340 +1 @@
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
import type { BaseNode } from "@llamaindex/core/schema";
import { MetadataMode } from "@llamaindex/core/schema";
import { getEnv } from "@llamaindex/env";
import type { BulkWriteOptions, Collection } from "mongodb";
import { MongoClient } from "mongodb";
import {
BaseVectorStore,
FilterCondition,
type FilterOperator,
type MetadataFilter,
type MetadataFilters,
type VectorStoreBaseParams,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
// define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/
const DEFAULT_EMBEDDING_DEFINITION = {
type: "knnVector",
dimensions: 1536,
similarity: "cosine",
};
function mapLcMqlFilterOperators(operator: string): string {
const operatorMap: { [key in FilterOperator]?: string } = {
"==": "$eq",
"<": "$lt",
"<=": "$lte",
">": "$gt",
">=": "$gte",
"!=": "$ne",
in: "$in",
nin: "$nin",
};
const mqlOperator = operatorMap[operator as FilterOperator];
if (!mqlOperator) throw new Error(`Unsupported operator: ${operator}`);
return mqlOperator;
}
function toMongoDBFilter(filters?: MetadataFilters): Record<string, unknown> {
if (!filters) return {};
const createFilterObject = (mf: MetadataFilter) => ({
[mf.key]: {
[mapLcMqlFilterOperators(mf.operator)]: mf.value,
},
});
if (filters.filters.length === 1) {
return createFilterObject(filters.filters[0]!);
}
if (filters.condition === FilterCondition.AND) {
return { $and: filters.filters.map(createFilterObject) };
}
if (filters.condition === FilterCondition.OR) {
return { $or: filters.filters.map(createFilterObject) };
}
throw new Error("filters condition not recognized. Must be AND or OR");
}
/**
* Vector store that uses MongoDB Atlas for storage and vector search.
* This store uses the $vectorSearch aggregation stage to perform vector similarity search.
*/
export class MongoDBAtlasVectorSearch extends BaseVectorStore {
storesText: boolean = true;
flatMetadata: boolean = true;
dbName: string;
collectionName: string;
autoCreateIndex: boolean;
embeddingDefinition: Record<string, unknown>;
indexedMetadataFields: string[];
/**
* The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
*/
mongodbClient: MongoClient;
/**
* Name of the vector index. If invalid, Mongo will silently ignore this issue and return 0 results.
*
* Default: "default"
*/
indexName: string;
/**
* Name of the key containing the embedding vector.
*
* Default: "embedding"
*/
embeddingKey: string;
/**
* Name of the key containing the node id.
*
* Default: "id"
*/
idKey: string;
/**
* Name of the key containing the node text.
*
* Default: "text"
*/
textKey: string;
/**
* Name of the key containing the node metadata.
*
* Default: "metadata"
*/
metadataKey: string;
/**
* Options to pass to the insertMany function when adding nodes.
*/
insertOptions?: BulkWriteOptions | undefined;
/**
* Function to determine the number of candidates to retrieve for a given query.
* In case your results are not good, you might tune this value.
*
* {@link https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ | Run Vector Search Queries}
*
* {@link https://arxiv.org/abs/1603.09320 | Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs}
*
*
* Default: query.similarityTopK * 10
*/
numCandidates: (query: VectorStoreQuery) => number;
private collection?: Collection;
constructor(
init: Partial<MongoDBAtlasVectorSearch> & {
dbName: string;
collectionName: string;
embedModel?: BaseEmbedding;
autoCreateIndex?: boolean;
indexedMetadataFields?: string[];
embeddingDefinition?: Record<string, unknown>;
} & VectorStoreBaseParams,
) {
super(init);
if (init.mongodbClient) {
this.mongodbClient = init.mongodbClient;
} else {
const mongoUri = getEnv("MONGODB_URI");
if (!mongoUri) {
throw new Error(
"Must specify MONGODB_URI via env variable if not directly passing in client.",
);
}
this.mongodbClient = new MongoClient(mongoUri);
}
this.dbName = init.dbName ?? "default_db";
this.collectionName = init.collectionName ?? "default_collection";
this.autoCreateIndex = init.autoCreateIndex ?? true;
this.indexedMetadataFields = init.indexedMetadataFields ?? [];
this.embeddingDefinition = {
...DEFAULT_EMBEDDING_DEFINITION,
...(init.embeddingDefinition ?? {}),
};
this.indexName = init.indexName ?? "default";
this.embeddingKey = init.embeddingKey ?? "embedding";
this.idKey = init.idKey ?? "id";
this.textKey = init.textKey ?? "text";
this.metadataKey = init.metadataKey ?? "metadata";
this.numCandidates =
init.numCandidates ?? ((query) => query.similarityTopK * 10);
this.insertOptions = init.insertOptions;
}
async ensureCollection(): Promise<Collection> {
if (!this.collection) {
const collection = await this.mongodbClient
.db(this.dbName)
.createCollection(this.collectionName);
this.collection = collection;
}
if (this.autoCreateIndex) {
const searchIndexes = await this.collection.listSearchIndexes().toArray();
const indexExists = searchIndexes.some(
(index) => index.name === this.indexName,
);
if (!indexExists) {
const additionalDefinition: Record<string, { type: string }> = {};
this.indexedMetadataFields.forEach((field) => {
additionalDefinition[field] = { type: "token" };
});
await this.collection.createSearchIndex({
name: this.indexName,
definition: {
mappings: {
dynamic: true,
fields: {
embedding: this.embeddingDefinition,
...additionalDefinition,
},
},
},
});
}
}
return this.collection;
}
/**
* Add nodes to the vector store.
*
* @param nodes Nodes to add to the vector store
* @returns List of node ids that were added
*/
async add(nodes: BaseNode[]): Promise<string[]> {
if (!nodes || nodes.length === 0) {
return [];
}
const dataToInsert = nodes.map((node) => {
const metadata = nodeToMetadata(
node,
true,
this.textKey,
this.flatMetadata,
);
// Include the specified metadata fields in the top level of the document (to help filter)
const populatedMetadata: Record<string, unknown> = {};
for (const field of this.indexedMetadataFields) {
populatedMetadata[field] = metadata[field];
}
return {
[this.idKey]: node.id_,
[this.embeddingKey]: node.getEmbedding(),
[this.textKey]: node.getContent(MetadataMode.NONE) || "",
[this.metadataKey]: metadata,
...populatedMetadata,
};
});
const collection = await this.ensureCollection();
const insertResult = await collection.insertMany(
dataToInsert,
this.insertOptions,
);
return nodes.map((node) => node.id_);
}
/**
* Delete nodes from the vector store with the given redDocId.
*
* @param refDocId The refDocId of the nodes to delete
* @param deleteOptions Options to pass to the deleteOne function
*/
async delete(refDocId: string, deleteOptions?: object): Promise<void> {
const collection = await this.ensureCollection();
await collection.deleteMany(
{
[`${this.metadataKey}.ref_doc_id`]: refDocId,
},
deleteOptions,
);
}
client() {
return this.mongodbClient;
}
/**
* Perform a vector similarity search query.
*
* @param query The query to run
* @returns List of nodes and their similarities
*/
async query(
query: VectorStoreQuery,
options?: object,
): Promise<VectorStoreQueryResult> {
const params: Record<string, unknown> = {
queryVector: query.queryEmbedding,
path: this.embeddingKey,
numCandidates: this.numCandidates(query),
limit: query.similarityTopK,
index: this.indexName,
};
if (query.filters) {
params.filter = toMongoDBFilter(query.filters);
}
const queryField = { $vectorSearch: params };
const pipeline = [
queryField,
{
$project: {
score: { $meta: "vectorSearchScore" },
[this.embeddingKey]: 0,
},
},
];
const collection = await this.ensureCollection();
const cursor = await collection.aggregate(pipeline);
const nodes: BaseNode[] = [];
const ids: string[] = [];
const similarities: number[] = [];
for await (const res of await cursor) {
const text = res[this.textKey];
const score = res.score;
const id = res[this.idKey];
const metadata = res[this.metadataKey];
const node = metadataDictToNode(metadata);
node.setContent(text);
ids.push(id);
nodes.push(node);
similarities.push(score);
}
const result = {
nodes,
similarities,
ids,
};
return result;
}
}
export * from "@llamaindex/mongodb";

Some files were not shown because too many files have changed in this diff Show More