mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-01 22:14:03 -04:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 89ea1e1d31 | |||
| d9bbaf95f3 | |||
| 8744796c06 | |||
| f02621e379 | |||
| 1892e1ce1d | |||
| d90d8959a5 | |||
| 4df1fe6cca | |||
| 34faf4821a | |||
| b24ffc6174 | |||
| 82e25c924c | |||
| 1931bbca74 | |||
| 94566169fb | |||
| d6c270ec7a |
@@ -150,7 +150,7 @@ jobs:
|
||||
done
|
||||
- name: Pack provider packages
|
||||
run: |
|
||||
for dir in packages/providers/*; do
|
||||
for dir in packages/providers/* packages/providers/storage/*; do
|
||||
if [ -d "$dir" ] && [ -f "$dir/package.json" ]; then
|
||||
echo "Packing $dir"
|
||||
pnpm pack --pack-destination ${{ runner.temp }} -C $dir
|
||||
|
||||
@@ -76,7 +76,7 @@ If you need any of those classes, you have to import them instead directly thoug
|
||||
Here's an example for importing the `PineconeVectorStore` class:
|
||||
|
||||
```typescript
|
||||
import { PineconeVectorStore } from "llamaindex/storage/vectorStore/PineconeVectorStore";
|
||||
import { PineconeVectorStore } from "llamaindex/vector-store/PineconeVectorStore";
|
||||
```
|
||||
|
||||
As the `PDFReader` is not working with the Edge runtime, here's how to use the `SimpleDirectoryReader` with the `LlamaParseReader` to load PDFs:
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
# @llamaindex/doc
|
||||
|
||||
## 0.0.36
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- f02621e: Fix internal links between chapters
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [d6c270e]
|
||||
- Updated dependencies [1892e1c]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/core@0.4.21
|
||||
- @llamaindex/cloud@2.0.22
|
||||
- @llamaindex/openai@0.1.46
|
||||
- @llamaindex/node-parser@0.0.22
|
||||
- @llamaindex/readers@1.0.23
|
||||
|
||||
## 0.0.35
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/doc",
|
||||
"version": "0.0.35",
|
||||
"version": "0.0.36",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"build": "pnpm run build:docs && next build",
|
||||
|
||||
@@ -20,7 +20,7 @@ npm install llamaindex
|
||||
|
||||
## Choose your model
|
||||
|
||||
By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](local_model).
|
||||
By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](3_local_model).
|
||||
|
||||
## Get an OpenAI API key
|
||||
|
||||
@@ -36,4 +36,4 @@ We'll use `dotenv` to pull the API key out of that .env file, so also run:
|
||||
npm install dotenv
|
||||
```
|
||||
|
||||
Now you're ready to [create your agent](create_agent).
|
||||
Now you're ready to [create your agent](2_create_agent).
|
||||
|
||||
@@ -177,5 +177,5 @@ The second piece of output is the response from the LLM itself, where the `messa
|
||||
Great! We've built an agent with tool use! Next you can:
|
||||
|
||||
- [See the full code](https://github.com/run-llama/ts-agents/blob/main/1_agent/agent.ts)
|
||||
- [Switch to a local LLM](local_model)
|
||||
- Move on to [add Retrieval-Augmented Generation to your agent](agentic_rag)
|
||||
- [Switch to a local LLM](3_local_model)
|
||||
- Move on to [add Retrieval-Augmented Generation to your agent](4_agentic_rag)
|
||||
|
||||
@@ -89,4 +89,4 @@ You can use a ReActAgent instead of an OpenAIAgent in any of the further example
|
||||
|
||||
### Next steps
|
||||
|
||||
Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](agentic_rag).
|
||||
Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](4_agentic_rag).
|
||||
|
||||
@@ -153,4 +153,4 @@ The `OpenAIContextAwareAgent` approach simplifies the setup by allowing you to d
|
||||
|
||||
On the other hand, using the `QueryEngineTool` offers more flexibility and power. This method allows for customization in how queries are constructed and executed, enabling you to query data from various storages and process them in different ways. However, this added flexibility comes with increased complexity and response time due to the separate tool call and queryEngine generating tool output by LLM that is then passed to the agent.
|
||||
|
||||
So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](rag_and_tools)!
|
||||
So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](5_rag_and_tools)!
|
||||
|
||||
@@ -127,4 +127,4 @@ In the final tool call, it used the `sumNumbers` function to add the two budgets
|
||||
}
|
||||
```
|
||||
|
||||
Great! Now let's improve accuracy by improving our parsing with [LlamaParse](llamaparse).
|
||||
Great! Now let's improve accuracy by improving our parsing with [LlamaParse](6_llamaparse).
|
||||
|
||||
@@ -17,4 +17,4 @@ const documents = await reader.loadData("../data/sf_budget_2023_2024.pdf");
|
||||
|
||||
Now you will be able to ask more complicated questions of the same PDF and get better results. You can find this code [in our repo](https://github.com/run-llama/ts-agents/blob/main/4_llamaparse/agent.ts).
|
||||
|
||||
Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](qdrant).
|
||||
Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](7_qdrant).
|
||||
|
||||
@@ -65,13 +65,13 @@ Since parsing a PDF can be slow, especially a large one, using the pre-parsed ch
|
||||
|
||||
In this guide you've learned how to
|
||||
|
||||
- [Create an agent](create_agent)
|
||||
- [Create an agent](2_create_agent)
|
||||
- Use remote LLMs like GPT-4
|
||||
- [Use local LLMs like Mixtral](local_model)
|
||||
- [Create a RAG query engine](agentic_rag)
|
||||
- [Turn functions and query engines into agent tools](rag_and_tools)
|
||||
- [Use local LLMs like Mixtral](3_local_model)
|
||||
- [Create a RAG query engine](4_agentic_rag)
|
||||
- [Turn functions and query engines into agent tools](5_rag_and_tools)
|
||||
- Combine those tools
|
||||
- [Enhance your parsing with LlamaParse](llamaparse)
|
||||
- [Enhance your parsing with LlamaParse](6_llamaparse)
|
||||
- Persist your data in a vector store
|
||||
|
||||
The next steps are up to you! Try creating more complex functions and query engines, and set your agent loose on the world.
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# @llamaindex/core-e2e
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 34faf48: chore: move vector stores to their own packages
|
||||
- 9456616: refactor: @llamaindex/postgres
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,15 @@
|
||||
# @llamaindex/cloudflare-worker-agent-test
|
||||
|
||||
## 0.0.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloudflare-worker-agent-test",
|
||||
"version": "0.0.127",
|
||||
"version": "0.0.128",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# @llamaindex/llama-parse-browser-test
|
||||
|
||||
## 0.0.42
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d6c270e]
|
||||
- @llamaindex/cloud@2.0.22
|
||||
|
||||
## 0.0.41
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/llama-parse-browser-test",
|
||||
"private": true,
|
||||
"version": "0.0.41",
|
||||
"version": "0.0.42",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
@@ -10,7 +10,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.7.2",
|
||||
"vite": "^5.4.11",
|
||||
"vite": "^5.4.12",
|
||||
"vite-plugin-wasm": "^3.3.0"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@@ -1,5 +1,15 @@
|
||||
# @llamaindex/next-agent-test
|
||||
|
||||
## 0.1.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.1.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-agent-test",
|
||||
"version": "0.1.127",
|
||||
"version": "0.1.128",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,15 @@
|
||||
# test-edge-runtime
|
||||
|
||||
## 0.1.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.1.126
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/nextjs-edge-runtime-test",
|
||||
"version": "0.1.126",
|
||||
"version": "0.1.127",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,15 @@
|
||||
# @llamaindex/next-node-runtime
|
||||
|
||||
## 0.0.109
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.108
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/next-node-runtime-test",
|
||||
"version": "0.0.108",
|
||||
"version": "0.0.109",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -1,5 +1,15 @@
|
||||
# @llamaindex/waku-query-engine-test
|
||||
|
||||
## 0.0.128
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.127
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/waku-query-engine-test",
|
||||
"version": "0.0.127",
|
||||
"version": "0.0.128",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/e2e",
|
||||
"private": true,
|
||||
"version": "0.0.7",
|
||||
"version": "0.0.8",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"e2e": "node --import tsx --import ./mock-register.js --test ./node/**/*.e2e.ts",
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
# examples
|
||||
|
||||
## 0.0.23
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 8744796: Update the chromadb npm client to support the latest chromadb image (0.6.3)
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/core@0.4.21
|
||||
- @llamaindex/vercel@0.0.8
|
||||
- @llamaindex/readers@1.0.23
|
||||
|
||||
## 0.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { OpenAI, OpenAIAgent, WikipediaTool } from "llamaindex";
|
||||
import { OpenAI, OpenAIAgent } from "llamaindex";
|
||||
import { WikipediaTool } from "../wiki";
|
||||
|
||||
async function main() {
|
||||
const llm = new OpenAI({ model: "gpt-4-turbo" });
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Anthropic, FunctionTool, Settings, WikipediaTool } from "llamaindex";
|
||||
import { Anthropic, FunctionTool, Settings } from "llamaindex";
|
||||
import { AnthropicAgent } from "llamaindex/agent/anthropic";
|
||||
import { WikipediaTool } from "../wiki";
|
||||
|
||||
Settings.callbackManager.on("llm-tool-call", (event) => {
|
||||
console.log("llm-tool-call", event.detail.toolCall);
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
const collectionName = "movie_reviews";
|
||||
|
||||
async function main() {
|
||||
const sourceFile: string = "./data/movie_reviews.csv";
|
||||
const sourceFile: string = "../data/movie_reviews.csv";
|
||||
|
||||
try {
|
||||
console.log(`Loading data from ${sourceFile}`);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/examples",
|
||||
"private": true,
|
||||
"version": "0.0.22",
|
||||
"version": "0.0.23",
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.0.5",
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
@@ -9,9 +9,9 @@
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@azure/search-documents": "^12.1.0",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@llamaindex/core": "^0.4.20",
|
||||
"@llamaindex/readers": "^1.0.22",
|
||||
"@llamaindex/vercel": "^0.0.7",
|
||||
"@llamaindex/core": "^0.4.21",
|
||||
"@llamaindex/readers": "^1.0.23",
|
||||
"@llamaindex/vercel": "^0.0.8",
|
||||
"@llamaindex/workflow": "^0.0.8",
|
||||
"@notionhq/client": "^2.2.15",
|
||||
"@pinecone-database/pinecone": "^4.0.0",
|
||||
@@ -22,10 +22,12 @@
|
||||
"commander": "^12.1.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"llamaindex": "^0.8.31",
|
||||
"mongodb": "^6.7.0",
|
||||
"llamaindex": "^0.8.32",
|
||||
"mongodb": "6.7.0",
|
||||
"pathe": "^1.1.2",
|
||||
"postgres": "^3.4.4"
|
||||
"postgres": "^3.4.4",
|
||||
"ajv": "^8.17.1",
|
||||
"wikipedia": "^2.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.9.0",
|
||||
|
||||
@@ -37,7 +37,7 @@ Read and follow the instructions in the README.md file located one directory up
|
||||
|
||||
To import documents and save the embedding vectors to your database:
|
||||
|
||||
> `npx tsx pg-vector-store/load-docs.ts data`
|
||||
> `npx tsx vector-store/pg/load-docs.ts data`
|
||||
|
||||
where data is the directory containing your input files. Using the `data` directory in the example above will read all of the files in that directory using the LlamaIndexTS default readers for each file type.
|
||||
|
||||
@@ -45,6 +45,23 @@ where data is the directory containing your input files. Using the `data` direct
|
||||
|
||||
To query using the resulting vector store:
|
||||
|
||||
> `npx tsx pg-vector-store/query.ts`
|
||||
> `npx tsx vector-store/pg/query.ts`
|
||||
|
||||
The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query.
|
||||
|
||||
## Supabase
|
||||
|
||||
You can try the supabase example by running:
|
||||
|
||||
> `npx tsx vector-store/pg/supabase.ts`
|
||||
|
||||
This will use the `POSTGRES_URL` environment variable to connect to your Supabase database.
|
||||
Get one from the Supabase project settings page. See more details here: https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connection
|
||||
|
||||
## Vercel
|
||||
|
||||
You can try the vercel example by running:
|
||||
|
||||
> `npx tsx vector-store/pg/vercel.ts`
|
||||
|
||||
For more information on Vercel Postgres, see: https://vercel.com/docs/storage/vercel-postgres/sdk
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
// load-docs.ts
|
||||
import {
|
||||
PGVectorStore,
|
||||
SimpleDirectoryReader,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
import fs from "node:fs/promises";
|
||||
|
||||
async function getSourceFilenames(sourceDir: string) {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import dotenv from "dotenv";
|
||||
import { Document, PGVectorStore, VectorStoreQueryMode } from "llamaindex";
|
||||
import { Document, VectorStoreQueryMode } from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
import postgres from "postgres";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { PGVectorStore, VectorStoreIndex } from "llamaindex";
|
||||
import { VectorStoreIndex } from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
|
||||
async function main() {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import dotenv from "dotenv";
|
||||
import {
|
||||
SimpleDirectoryReader,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// Get direct connection string from Supabase and set it as POSTGRES_URL environment variable
|
||||
// https://supabase.com/docs/guides/database/connecting-to-postgres#direct-connection
|
||||
|
||||
const sourceDir = "../data";
|
||||
const connectionString = process.env.POSTGRES_URL;
|
||||
|
||||
const rdr = new SimpleDirectoryReader();
|
||||
const docs = await rdr.loadData({ directoryPath: sourceDir });
|
||||
const pgvs = new PGVectorStore({ clientConfig: { connectionString } });
|
||||
pgvs.setCollection(sourceDir);
|
||||
|
||||
const ctx = await storageContextFromDefaults({ vectorStore: pgvs });
|
||||
|
||||
const index = await VectorStoreIndex.fromDocuments(docs, {
|
||||
storageContext: ctx,
|
||||
});
|
||||
|
||||
const queryEngine = index.asQueryEngine();
|
||||
|
||||
const results = await queryEngine.query({
|
||||
query: "Information about the planet",
|
||||
});
|
||||
|
||||
console.log(results);
|
||||
@@ -1,7 +1,8 @@
|
||||
// https://vercel.com/docs/storage/vercel-postgres/sdk
|
||||
import { sql } from "@vercel/postgres";
|
||||
import dotenv from "dotenv";
|
||||
import { Document, PGVectorStore, VectorStoreQueryMode } from "llamaindex";
|
||||
import { Document, VectorStoreQueryMode } from "llamaindex";
|
||||
import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { openai } from "@ai-sdk/openai";
|
||||
import { VercelLLM } from "@llamaindex/vercel";
|
||||
import { LLMAgent, WikipediaTool } from "llamaindex";
|
||||
import { LLMAgent } from "llamaindex";
|
||||
import { WikipediaTool } from "../wiki";
|
||||
|
||||
async function main() {
|
||||
// Create an instance of VercelLLM with the OpenAI model
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/** Example of a tool that uses Wikipedia */
|
||||
|
||||
import type { BaseTool, ToolMetadata } from "@llamaindex/core/llms";
|
||||
import type { JSONSchemaType } from "ajv";
|
||||
import { default as wiki } from "wikipedia";
|
||||
@@ -7,7 +9,7 @@ type WikipediaParameter = {
|
||||
lang?: string;
|
||||
};
|
||||
|
||||
export type WikipediaToolParams = {
|
||||
type WikipediaToolParams = {
|
||||
metadata?: ToolMetadata<JSONSchemaType<WikipediaParameter>>;
|
||||
};
|
||||
|
||||
@@ -43,8 +45,8 @@ export class WikipediaTool implements BaseTool<WikipediaParameter> {
|
||||
page: string,
|
||||
lang: string = this.DEFAULT_LANG,
|
||||
): Promise<string> {
|
||||
wiki.default.setLang(lang);
|
||||
const pageResult = await wiki.default.page(page, { autoSuggest: false });
|
||||
wiki.setLang(lang);
|
||||
const pageResult = await wiki.page(page, { autoSuggest: false });
|
||||
const content = await pageResult.content();
|
||||
return content;
|
||||
}
|
||||
@@ -53,7 +55,7 @@ export class WikipediaTool implements BaseTool<WikipediaParameter> {
|
||||
query,
|
||||
lang = this.DEFAULT_LANG,
|
||||
}: WikipediaParameter): Promise<string> {
|
||||
const searchResult = await wiki.default.search(query);
|
||||
const searchResult = await wiki.search(query);
|
||||
if (searchResult.results.length === 0) return "No search results.";
|
||||
return await this.loadData(searchResult.results[0].title, lang);
|
||||
}
|
||||
@@ -1,5 +1,15 @@
|
||||
# @llamaindex/autotool
|
||||
|
||||
## 5.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 5.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
# @llamaindex/autotool-01-node-example
|
||||
|
||||
## 0.0.75
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/autotool@5.0.32
|
||||
|
||||
## 0.0.74
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -13,5 +13,5 @@
|
||||
"scripts": {
|
||||
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
|
||||
},
|
||||
"version": "0.0.74"
|
||||
"version": "0.0.75"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
# @llamaindex/autotool-02-next-example
|
||||
|
||||
## 0.1.119
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
- @llamaindex/autotool@5.0.32
|
||||
|
||||
## 0.1.118
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool-02-next-example",
|
||||
"private": true,
|
||||
"version": "0.1.118",
|
||||
"version": "0.1.119",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool",
|
||||
"type": "module",
|
||||
"version": "5.0.31",
|
||||
"version": "5.0.32",
|
||||
"description": "auto transpile your JS function to LLM Agent compatible",
|
||||
"files": [
|
||||
"dist",
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
# @llamaindex/cloud
|
||||
|
||||
## 2.0.22
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- d6c270e: feat: support pass project and org id to llama parse reader
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- @llamaindex/core@0.4.21
|
||||
|
||||
## 2.0.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@llamaindex/cloud",
|
||||
"version": "2.0.21",
|
||||
"version": "2.0.22",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
|
||||
@@ -31,6 +31,8 @@ var process: any;
|
||||
* See https://github.com/run-llama/llama_parse
|
||||
*/
|
||||
export class LlamaParseReader extends FileReader {
|
||||
project_id?: string | undefined;
|
||||
organization_id?: string | undefined;
|
||||
// The API key for the LlamaParse API. Can be set as an environment variable: LLAMA_CLOUD_API_KEY
|
||||
apiKey: string;
|
||||
// The base URL of the Llama Cloud Platform.
|
||||
@@ -259,6 +261,10 @@ export class LlamaParseReader extends FileReader {
|
||||
const response = await uploadFileApiV1ParsingUploadPost({
|
||||
client: this.#client,
|
||||
throwOnError: true,
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal: AbortSignal.timeout(this.maxTimeout * 1000),
|
||||
body,
|
||||
});
|
||||
@@ -284,6 +290,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
const { data } = result;
|
||||
@@ -300,6 +310,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
@@ -311,6 +325,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
@@ -322,6 +340,10 @@ export class LlamaParseReader extends FileReader {
|
||||
path: {
|
||||
job_id: jobId,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
@@ -511,6 +533,10 @@ export class LlamaParseReader extends FileReader {
|
||||
job_id: jobId,
|
||||
name: imageName,
|
||||
},
|
||||
query: {
|
||||
project_id: this.project_id ?? null,
|
||||
organization_id: this.organization_id ?? null,
|
||||
},
|
||||
});
|
||||
if (response.error) {
|
||||
throw new Error(`Failed to download image: ${response.error.detail}`);
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
# @llamaindex/community
|
||||
|
||||
## 0.0.79
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- @llamaindex/core@0.4.21
|
||||
|
||||
## 0.0.78
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/community",
|
||||
"description": "Community package for LlamaIndexTS",
|
||||
"version": "0.0.78",
|
||||
"version": "0.0.79",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# @llamaindex/core
|
||||
|
||||
## 0.4.21
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 9456616: refactor: @llamaindex/postgres
|
||||
- 1931bbc: refactor: @llamaindex/azure
|
||||
|
||||
## 0.4.20
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/core",
|
||||
"type": "module",
|
||||
"version": "0.4.20",
|
||||
"version": "0.4.21",
|
||||
"description": "LlamaIndex Core Module",
|
||||
"exports": {
|
||||
"./agent": {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { Tokenizer } from "@llamaindex/env/tokenizers";
|
||||
import type { BaseEmbedding } from "../embeddings";
|
||||
import type { LLM } from "../llms";
|
||||
import {
|
||||
type CallbackManager,
|
||||
@@ -12,6 +13,11 @@ import {
|
||||
setChunkSize,
|
||||
withChunkSize,
|
||||
} from "./settings/chunk-size";
|
||||
import {
|
||||
getEmbeddedModel,
|
||||
setEmbeddedModel,
|
||||
withEmbeddedModel,
|
||||
} from "./settings/embedModel";
|
||||
import { getLLM, setLLM, withLLM } from "./settings/llm";
|
||||
import {
|
||||
getTokenizer,
|
||||
@@ -29,6 +35,15 @@ export const Settings = {
|
||||
withLLM<Result>(llm: LLM, fn: () => Result): Result {
|
||||
return withLLM(llm, fn);
|
||||
},
|
||||
get embedModel() {
|
||||
return getEmbeddedModel();
|
||||
},
|
||||
set embedModel(embedModel) {
|
||||
setEmbeddedModel(embedModel);
|
||||
},
|
||||
withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
|
||||
return withEmbeddedModel(embedModel, fn);
|
||||
},
|
||||
get tokenizer() {
|
||||
return getTokenizer();
|
||||
},
|
||||
|
||||
+7
-4
@@ -1,15 +1,18 @@
|
||||
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { AsyncLocalStorage } from "@llamaindex/env";
|
||||
import { OpenAIEmbedding } from "@llamaindex/openai";
|
||||
|
||||
const embeddedModelAsyncLocalStorage = new AsyncLocalStorage<BaseEmbedding>();
|
||||
let globalEmbeddedModel: BaseEmbedding | null = null;
|
||||
|
||||
export function getEmbeddedModel(): BaseEmbedding {
|
||||
if (globalEmbeddedModel === null) {
|
||||
globalEmbeddedModel = new OpenAIEmbedding();
|
||||
const currentEmbeddedModel =
|
||||
embeddedModelAsyncLocalStorage.getStore() ?? globalEmbeddedModel;
|
||||
if (!currentEmbeddedModel) {
|
||||
throw new Error(
|
||||
"Cannot find Embedding, please set `Settings.embedModel = ...` on the top of your code",
|
||||
);
|
||||
}
|
||||
return embeddedModelAsyncLocalStorage.getStore() ?? globalEmbeddedModel;
|
||||
return currentEmbeddedModel;
|
||||
}
|
||||
|
||||
export function setEmbeddedModel(embeddedModel: BaseEmbedding) {
|
||||
@@ -0,0 +1,167 @@
|
||||
import { path } from "@llamaindex/env";
|
||||
import {
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "../../global";
|
||||
import type { StoredValue } from "../../schema";
|
||||
import { BaseNode, Document, ObjectType, TextNode } from "../../schema";
|
||||
|
||||
const TYPE_KEY = "__type__";
|
||||
const DATA_KEY = "__data__";
|
||||
|
||||
export interface Serializer<T> {
|
||||
toPersistence(data: Record<string, unknown>): T;
|
||||
|
||||
fromPersistence(data: T): Record<string, unknown>;
|
||||
}
|
||||
|
||||
export const jsonSerializer: Serializer<string> = {
|
||||
toPersistence(data) {
|
||||
return JSON.stringify(data);
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return JSON.parse(data);
|
||||
},
|
||||
};
|
||||
|
||||
export const noneSerializer: Serializer<Record<string, unknown>> = {
|
||||
toPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
};
|
||||
|
||||
type DocJson<Data> = {
|
||||
[TYPE_KEY]: ObjectType;
|
||||
[DATA_KEY]: Data;
|
||||
};
|
||||
|
||||
export function isValidDocJson(
|
||||
docJson: StoredValue | null | undefined,
|
||||
): docJson is DocJson<unknown> {
|
||||
return (
|
||||
typeof docJson === "object" &&
|
||||
docJson !== null &&
|
||||
docJson[TYPE_KEY] !== undefined &&
|
||||
docJson[DATA_KEY] !== undefined
|
||||
);
|
||||
}
|
||||
|
||||
export function docToJson(
|
||||
doc: BaseNode,
|
||||
serializer: Serializer<unknown>,
|
||||
): DocJson<unknown> {
|
||||
return {
|
||||
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
|
||||
[TYPE_KEY]: doc.type,
|
||||
};
|
||||
}
|
||||
|
||||
export function jsonToDoc<Data>(
|
||||
docDict: DocJson<Data>,
|
||||
serializer: Serializer<Data>,
|
||||
): BaseNode {
|
||||
const docType = docDict[TYPE_KEY];
|
||||
// fixme: zod type check this
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]);
|
||||
let doc: BaseNode;
|
||||
|
||||
if (docType === ObjectType.DOCUMENT) {
|
||||
doc = new Document({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
embedding: dataDict.embedding,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
});
|
||||
} else if (docType === ObjectType.TEXT) {
|
||||
doc = new TextNode({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
relationships: dataDict.relationships,
|
||||
});
|
||||
} else {
|
||||
throw new Error(`Unknown doc type: ${docType}`);
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
const DEFAULT_PERSIST_PATH = path.join(
|
||||
DEFAULT_PERSIST_DIR,
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
);
|
||||
|
||||
export interface RefDocInfo {
|
||||
nodeIds: string[];
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
extraInfo: Record<string, any>;
|
||||
}
|
||||
|
||||
export abstract class BaseDocumentStore {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
serializer: Serializer<any> = jsonSerializer;
|
||||
|
||||
// Save/load
|
||||
persist(persistPath: string = DEFAULT_PERSIST_PATH): void {
|
||||
// Persist the docstore to a file.
|
||||
}
|
||||
|
||||
// Main interface
|
||||
abstract docs(): Promise<Record<string, BaseNode>>;
|
||||
|
||||
abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>;
|
||||
|
||||
abstract getDocument(
|
||||
docId: string,
|
||||
raiseError: boolean,
|
||||
): Promise<BaseNode | undefined>;
|
||||
|
||||
abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
abstract documentExists(docId: string): Promise<boolean>;
|
||||
|
||||
// Hash
|
||||
abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
|
||||
|
||||
abstract getDocumentHash(docId: string): Promise<string | undefined>;
|
||||
|
||||
abstract getAllDocumentHashes(): Promise<Record<string, string>>;
|
||||
|
||||
// Ref Docs
|
||||
abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>;
|
||||
|
||||
abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>;
|
||||
|
||||
abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
// Nodes
|
||||
getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> {
|
||||
return Promise.all(
|
||||
nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)),
|
||||
);
|
||||
}
|
||||
|
||||
async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> {
|
||||
const doc = await this.getDocument(nodeId, raiseError);
|
||||
if (!(doc instanceof BaseNode)) {
|
||||
throw new Error(`Document ${nodeId} is not a Node.`);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
async getNodeDict(nodeIdDict: {
|
||||
[index: number]: string;
|
||||
}): Promise<Record<number, BaseNode>> {
|
||||
const result: Record<number, BaseNode> = {};
|
||||
for (const index in nodeIdDict) {
|
||||
result[index] = await this.getNode(nodeIdDict[index]!);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,167 +1,2 @@
|
||||
import { path } from "@llamaindex/env";
|
||||
import {
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "../../global";
|
||||
import type { StoredValue } from "../../schema";
|
||||
import { BaseNode, Document, ObjectType, TextNode } from "../../schema";
|
||||
|
||||
const TYPE_KEY = "__type__";
|
||||
const DATA_KEY = "__data__";
|
||||
|
||||
export interface Serializer<T> {
|
||||
toPersistence(data: Record<string, unknown>): T;
|
||||
|
||||
fromPersistence(data: T): Record<string, unknown>;
|
||||
}
|
||||
|
||||
export const jsonSerializer: Serializer<string> = {
|
||||
toPersistence(data) {
|
||||
return JSON.stringify(data);
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return JSON.parse(data);
|
||||
},
|
||||
};
|
||||
|
||||
export const noneSerializer: Serializer<Record<string, unknown>> = {
|
||||
toPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
fromPersistence(data) {
|
||||
return data;
|
||||
},
|
||||
};
|
||||
|
||||
type DocJson<Data> = {
|
||||
[TYPE_KEY]: ObjectType;
|
||||
[DATA_KEY]: Data;
|
||||
};
|
||||
|
||||
export function isValidDocJson(
|
||||
docJson: StoredValue | null | undefined,
|
||||
): docJson is DocJson<unknown> {
|
||||
return (
|
||||
typeof docJson === "object" &&
|
||||
docJson !== null &&
|
||||
docJson[TYPE_KEY] !== undefined &&
|
||||
docJson[DATA_KEY] !== undefined
|
||||
);
|
||||
}
|
||||
|
||||
export function docToJson(
|
||||
doc: BaseNode,
|
||||
serializer: Serializer<unknown>,
|
||||
): DocJson<unknown> {
|
||||
return {
|
||||
[DATA_KEY]: serializer.toPersistence(doc.toJSON()),
|
||||
[TYPE_KEY]: doc.type,
|
||||
};
|
||||
}
|
||||
|
||||
export function jsonToDoc<Data>(
|
||||
docDict: DocJson<Data>,
|
||||
serializer: Serializer<Data>,
|
||||
): BaseNode {
|
||||
const docType = docDict[TYPE_KEY];
|
||||
// fixme: zod type check this
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]);
|
||||
let doc: BaseNode;
|
||||
|
||||
if (docType === ObjectType.DOCUMENT) {
|
||||
doc = new Document({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
embedding: dataDict.embedding,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
});
|
||||
} else if (docType === ObjectType.TEXT) {
|
||||
doc = new TextNode({
|
||||
text: dataDict.text,
|
||||
id_: dataDict.id_,
|
||||
hash: dataDict.hash,
|
||||
metadata: dataDict.metadata,
|
||||
relationships: dataDict.relationships,
|
||||
});
|
||||
} else {
|
||||
throw new Error(`Unknown doc type: ${docType}`);
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
const DEFAULT_PERSIST_PATH = path.join(
|
||||
DEFAULT_PERSIST_DIR,
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
);
|
||||
|
||||
export interface RefDocInfo {
|
||||
nodeIds: string[];
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
extraInfo: Record<string, any>;
|
||||
}
|
||||
|
||||
export abstract class BaseDocumentStore {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
serializer: Serializer<any> = jsonSerializer;
|
||||
|
||||
// Save/load
|
||||
persist(persistPath: string = DEFAULT_PERSIST_PATH): void {
|
||||
// Persist the docstore to a file.
|
||||
}
|
||||
|
||||
// Main interface
|
||||
abstract docs(): Promise<Record<string, BaseNode>>;
|
||||
|
||||
abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>;
|
||||
|
||||
abstract getDocument(
|
||||
docId: string,
|
||||
raiseError: boolean,
|
||||
): Promise<BaseNode | undefined>;
|
||||
|
||||
abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
abstract documentExists(docId: string): Promise<boolean>;
|
||||
|
||||
// Hash
|
||||
abstract setDocumentHash(docId: string, docHash: string): Promise<void>;
|
||||
|
||||
abstract getDocumentHash(docId: string): Promise<string | undefined>;
|
||||
|
||||
abstract getAllDocumentHashes(): Promise<Record<string, string>>;
|
||||
|
||||
// Ref Docs
|
||||
abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>;
|
||||
|
||||
abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>;
|
||||
|
||||
abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>;
|
||||
|
||||
// Nodes
|
||||
getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> {
|
||||
return Promise.all(
|
||||
nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)),
|
||||
);
|
||||
}
|
||||
|
||||
async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> {
|
||||
const doc = await this.getDocument(nodeId, raiseError);
|
||||
if (!(doc instanceof BaseNode)) {
|
||||
throw new Error(`Document ${nodeId} is not a Node.`);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
async getNodeDict(nodeIdDict: {
|
||||
[index: number]: string;
|
||||
}): Promise<Record<number, BaseNode>> {
|
||||
const result: Record<number, BaseNode> = {};
|
||||
for (const index in nodeIdDict) {
|
||||
result[index] = await this.getNode(nodeIdDict[index]!);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
export * from "./base-document-store";
|
||||
export * from "./kv-document-store";
|
||||
|
||||
+21
-19
@@ -1,15 +1,13 @@
|
||||
import { DEFAULT_NAMESPACE } from "@llamaindex/core/global";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { ObjectType } from "@llamaindex/core/schema";
|
||||
import type { RefDocInfo } from "@llamaindex/core/storage/doc-store";
|
||||
import { DEFAULT_NAMESPACE } from "../../global";
|
||||
import { BaseNode, ObjectType, type StoredValue } from "../../schema";
|
||||
import type { BaseKVStore } from "../kv-store";
|
||||
import {
|
||||
BaseDocumentStore,
|
||||
docToJson,
|
||||
isValidDocJson,
|
||||
jsonToDoc,
|
||||
} from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseKVStore } from "@llamaindex/core/storage/kv-store";
|
||||
import _ from "lodash";
|
||||
type RefDocInfo,
|
||||
} from "./base-document-store";
|
||||
|
||||
type DocMetaData = { docHash: string; refDocId?: string };
|
||||
|
||||
@@ -68,7 +66,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
extraInfo: {},
|
||||
};
|
||||
refDocInfo.nodeIds.push(doc.id_);
|
||||
if (_.isEmpty(refDocInfo.extraInfo)) {
|
||||
if (Object.keys(refDocInfo.extraInfo).length === 0) {
|
||||
refDocInfo.extraInfo = {};
|
||||
}
|
||||
await this.kvstore.put(
|
||||
@@ -88,7 +86,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
raiseError: boolean = true,
|
||||
): Promise<BaseNode | undefined> {
|
||||
const json = await this.kvstore.get(docId, this.nodeCollection);
|
||||
if (_.isNil(json)) {
|
||||
if (this.isNil(json)) {
|
||||
if (raiseError) {
|
||||
throw new Error(`docId ${docId} not found.`);
|
||||
} else {
|
||||
@@ -103,23 +101,23 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
|
||||
async getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined> {
|
||||
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
|
||||
return refDocInfo ? (_.clone(refDocInfo) as RefDocInfo) : undefined;
|
||||
return refDocInfo ? (structuredClone(refDocInfo) as RefDocInfo) : undefined;
|
||||
}
|
||||
|
||||
async getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined> {
|
||||
const refDocInfos = await this.kvstore.getAll(this.refDocCollection);
|
||||
if (_.isNil(refDocInfos)) {
|
||||
if (this.isNil(refDocInfos)) {
|
||||
return;
|
||||
}
|
||||
return refDocInfos as Record<string, RefDocInfo>;
|
||||
}
|
||||
|
||||
async refDocExists(refDocId: string): Promise<boolean> {
|
||||
return !_.isNil(await this.getRefDocInfo(refDocId));
|
||||
return !this.isNil(await this.getRefDocInfo(refDocId));
|
||||
}
|
||||
|
||||
async documentExists(docId: string): Promise<boolean> {
|
||||
return !_.isNil(await this.kvstore.get(docId, this.nodeCollection));
|
||||
return !this.isNil(await this.kvstore.get(docId, this.nodeCollection));
|
||||
}
|
||||
|
||||
private async removeRefDocNode(docId: string): Promise<void> {
|
||||
@@ -129,13 +127,13 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
}
|
||||
|
||||
const refDocId = metadata.refDocId;
|
||||
if (_.isNil(refDocId)) {
|
||||
if (this.isNil(refDocId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection);
|
||||
if (!_.isNil(refDocInfo)) {
|
||||
if (refDocInfo.nodeIds.length > 0) {
|
||||
if (!this.isNil(refDocInfo)) {
|
||||
if (refDocInfo!.nodeIds.length > 0) {
|
||||
await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection);
|
||||
}
|
||||
await this.kvstore.delete(refDocId, this.metadataCollection);
|
||||
@@ -164,7 +162,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
raiseError: boolean = true,
|
||||
): Promise<void> {
|
||||
const refDocInfo = await this.getRefDocInfo(refDocId);
|
||||
if (_.isNil(refDocInfo)) {
|
||||
if (this.isNil(refDocInfo)) {
|
||||
if (raiseError) {
|
||||
throw new Error(`ref_doc_id ${refDocId} not found.`);
|
||||
} else {
|
||||
@@ -172,7 +170,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
}
|
||||
}
|
||||
|
||||
for (const docId of refDocInfo.nodeIds) {
|
||||
for (const docId of refDocInfo!.nodeIds) {
|
||||
await this.deleteDocument(docId, false, false);
|
||||
}
|
||||
|
||||
@@ -187,7 +185,7 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
|
||||
async getDocumentHash(docId: string): Promise<string | undefined> {
|
||||
const metadata = await this.kvstore.get(docId, this.metadataCollection);
|
||||
return _.get(metadata, "docHash");
|
||||
return metadata?.docHash;
|
||||
}
|
||||
|
||||
async getAllDocumentHashes(): Promise<Record<string, string>> {
|
||||
@@ -201,4 +199,8 @@ export class KVDocumentStore extends BaseDocumentStore {
|
||||
}
|
||||
return hashes;
|
||||
}
|
||||
|
||||
private isNil(value: RefDocInfo | StoredValue | undefined): boolean {
|
||||
return value === null || value === undefined;
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,7 @@
|
||||
import type { BaseEmbedding } from "../embeddings/base.js";
|
||||
import { Settings } from "../global";
|
||||
import type { BaseNode, ModalityType } from "../schema/node.js";
|
||||
|
||||
/**
|
||||
* should compatible with npm:pg and npm:postgres
|
||||
*/
|
||||
@@ -12,3 +16,134 @@ export interface IsomorphicDB {
|
||||
close: () => Promise<void>;
|
||||
onCloseEvent: (listener: () => void) => void;
|
||||
}
|
||||
|
||||
export interface VectorStoreQueryResult {
|
||||
nodes?: BaseNode[];
|
||||
similarities: number[];
|
||||
ids: string[];
|
||||
}
|
||||
|
||||
export enum VectorStoreQueryMode {
|
||||
DEFAULT = "default",
|
||||
SPARSE = "sparse",
|
||||
HYBRID = "hybrid",
|
||||
// fit learners
|
||||
SVM = "svm",
|
||||
LOGISTIC_REGRESSION = "logistic_regression",
|
||||
LINEAR_REGRESSION = "linear_regression",
|
||||
// maximum marginal relevance
|
||||
MMR = "mmr",
|
||||
|
||||
// for Azure AI Search
|
||||
SEMANTIC_HYBRID = "semantic_hybrid",
|
||||
}
|
||||
|
||||
export enum FilterOperator {
|
||||
EQ = "==", // default operator (string, number)
|
||||
IN = "in", // In array (string or number)
|
||||
GT = ">", // greater than (number)
|
||||
LT = "<", // less than (number)
|
||||
NE = "!=", // not equal to (string, number)
|
||||
GTE = ">=", // greater than or equal to (number)
|
||||
LTE = "<=", // less than or equal to (number)
|
||||
NIN = "nin", // Not in array (string or number)
|
||||
ANY = "any", // Contains any (array of strings)
|
||||
ALL = "all", // Contains all (array of strings)
|
||||
TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field)
|
||||
CONTAINS = "contains", // metadata array contains value (string or number)
|
||||
IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array)
|
||||
}
|
||||
|
||||
export enum FilterCondition {
|
||||
AND = "and",
|
||||
OR = "or",
|
||||
}
|
||||
|
||||
export type MetadataFilterValue = string | number | string[] | number[];
|
||||
|
||||
export interface MetadataFilter {
|
||||
key: string;
|
||||
value?: MetadataFilterValue;
|
||||
operator: `${FilterOperator}`; // ==, any, all,...
|
||||
}
|
||||
|
||||
export interface MetadataFilters {
|
||||
filters: Array<MetadataFilter>;
|
||||
condition?: `${FilterCondition}`; // and, or
|
||||
}
|
||||
|
||||
export interface MetadataInfo {
|
||||
name: string;
|
||||
type: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface VectorStoreInfo {
|
||||
metadataInfo: MetadataInfo[];
|
||||
contentInfo: string;
|
||||
}
|
||||
|
||||
export interface VectorStoreQuery {
|
||||
queryEmbedding?: number[];
|
||||
similarityTopK: number;
|
||||
docIds?: string[];
|
||||
queryStr?: string;
|
||||
mode: VectorStoreQueryMode;
|
||||
alpha?: number;
|
||||
filters?: MetadataFilters | undefined;
|
||||
mmrThreshold?: number;
|
||||
}
|
||||
|
||||
// Supported types of vector stores (for each modality)
|
||||
export type VectorStoreByType = {
|
||||
[P in ModalityType]?: BaseVectorStore;
|
||||
};
|
||||
|
||||
export type VectorStoreBaseParams = {
|
||||
embeddingModel?: BaseEmbedding | undefined;
|
||||
};
|
||||
|
||||
export abstract class BaseVectorStore<Client = unknown> {
|
||||
embedModel: BaseEmbedding;
|
||||
abstract storesText: boolean;
|
||||
isEmbeddingQuery?: boolean;
|
||||
abstract client(): Client;
|
||||
abstract add(embeddingResults: BaseNode[]): Promise<string[]>;
|
||||
abstract delete(refDocId: string, deleteOptions?: object): Promise<void>;
|
||||
abstract query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult>;
|
||||
|
||||
protected constructor(params?: VectorStoreBaseParams) {
|
||||
this.embedModel = params?.embeddingModel ?? Settings.embedModel;
|
||||
}
|
||||
}
|
||||
|
||||
export const parsePrimitiveValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string | number => {
|
||||
if (typeof value !== "number" && typeof value !== "string") {
|
||||
throw new Error("Value must be a string or number");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseArrayValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string[] | number[] => {
|
||||
const isPrimitiveArray =
|
||||
Array.isArray(value) &&
|
||||
value.every((v) => typeof v === "string" || typeof v === "number");
|
||||
if (!isPrimitiveArray) {
|
||||
throw new Error("Value must be an array of strings or numbers");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseNumberValue = (value?: MetadataFilterValue): number => {
|
||||
if (typeof value !== "number") throw new Error("Value must be a number");
|
||||
return value;
|
||||
};
|
||||
|
||||
export * from "./utils.js";
|
||||
|
||||
+6
-29
@@ -1,6 +1,9 @@
|
||||
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
|
||||
import { ObjectType, jsonToNode } from "@llamaindex/core/schema";
|
||||
import type { MetadataFilterValue } from "./types.js";
|
||||
import {
|
||||
ObjectType,
|
||||
jsonToNode,
|
||||
type BaseNode,
|
||||
type Metadata,
|
||||
} from "../schema";
|
||||
|
||||
const DEFAULT_TEXT_KEY = "text";
|
||||
|
||||
@@ -91,32 +94,6 @@ export function metadataDictToNode(
|
||||
}
|
||||
}
|
||||
|
||||
export const parsePrimitiveValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string | number => {
|
||||
if (typeof value !== "number" && typeof value !== "string") {
|
||||
throw new Error("Value must be a string or number");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseArrayValue = (
|
||||
value?: MetadataFilterValue,
|
||||
): string[] | number[] => {
|
||||
const isPrimitiveArray =
|
||||
Array.isArray(value) &&
|
||||
value.every((v) => typeof v === "string" || typeof v === "number");
|
||||
if (!isPrimitiveArray) {
|
||||
throw new Error("Value must be an array of strings or numbers");
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const parseNumberValue = (value?: MetadataFilterValue): number => {
|
||||
if (typeof value !== "number") throw new Error("Value must be a number");
|
||||
return value;
|
||||
};
|
||||
|
||||
export const escapeLikeString = (value: string) => {
|
||||
return value.replace(/[%_\\]/g, "\\$&");
|
||||
};
|
||||
@@ -1,5 +1,15 @@
|
||||
# @llamaindex/experimental
|
||||
|
||||
## 0.0.144
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [1931bbc]
|
||||
- llamaindex@0.8.32
|
||||
|
||||
## 0.0.143
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/experimental",
|
||||
"description": "Experimental package for LlamaIndexTS",
|
||||
"version": "0.0.143",
|
||||
"version": "0.0.144",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
|
||||
@@ -1,5 +1,49 @@
|
||||
# llamaindex
|
||||
|
||||
## 0.8.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 34faf48: chore: move vector stores to their own packages
|
||||
- 4df1fe6: chore: migrate llamaindex llms and embeddings to their own packages
|
||||
- 9456616: refactor: @llamaindex/postgres
|
||||
- 1931bbc: refactor: @llamaindex/azure
|
||||
- Updated dependencies [34faf48]
|
||||
- Updated dependencies [4df1fe6]
|
||||
- Updated dependencies [9456616]
|
||||
- Updated dependencies [d6c270e]
|
||||
- Updated dependencies [1892e1c]
|
||||
- Updated dependencies [1931bbc]
|
||||
- Updated dependencies [8744796]
|
||||
- @llamaindex/astra@0.0.2
|
||||
- @llamaindex/chroma@0.0.2
|
||||
- @llamaindex/milvus@0.0.2
|
||||
- @llamaindex/mongodb@0.0.2
|
||||
- @llamaindex/pinecone@0.0.2
|
||||
- @llamaindex/qdrant@0.0.2
|
||||
- @llamaindex/upstash@0.0.2
|
||||
- @llamaindex/weaviate@0.0.2
|
||||
- @llamaindex/google@0.0.2
|
||||
- @llamaindex/mistral@0.0.2
|
||||
- @llamaindex/core@0.4.21
|
||||
- @llamaindex/cloud@2.0.22
|
||||
- @llamaindex/openai@0.1.46
|
||||
- @llamaindex/azure@0.0.2
|
||||
- @llamaindex/node-parser@0.0.22
|
||||
- @llamaindex/anthropic@0.0.30
|
||||
- @llamaindex/clip@0.0.30
|
||||
- @llamaindex/cohere@0.0.2
|
||||
- @llamaindex/deepinfra@0.0.30
|
||||
- @llamaindex/huggingface@0.0.30
|
||||
- @llamaindex/mixedbread@0.0.2
|
||||
- @llamaindex/ollama@0.0.37
|
||||
- @llamaindex/portkey-ai@0.0.30
|
||||
- @llamaindex/replicate@0.0.30
|
||||
- @llamaindex/postgres@0.0.30
|
||||
- @llamaindex/readers@1.0.23
|
||||
- @llamaindex/groq@0.0.45
|
||||
- @llamaindex/vllm@0.0.16
|
||||
|
||||
## 0.8.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "llamaindex",
|
||||
"version": "0.8.31",
|
||||
"version": "0.8.32",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
"keywords": [
|
||||
@@ -20,17 +20,6 @@
|
||||
"llamaindex"
|
||||
],
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "0.32.1",
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
"@aws-sdk/client-sso-oidc": "^3.693.0",
|
||||
"@azure/cosmos": "^4.1.1",
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@azure/search-documents": "^12.1.0",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@discoveryjs/json-ext": "^0.6.1",
|
||||
"@google-cloud/vertexai": "1.9.0",
|
||||
"@google/generative-ai": "0.21.0",
|
||||
"@grpc/grpc-js": "^1.12.2",
|
||||
"@llamaindex/anthropic": "workspace:*",
|
||||
"@llamaindex/clip": "workspace:*",
|
||||
"@llamaindex/cloud": "workspace:*",
|
||||
@@ -46,55 +35,31 @@
|
||||
"@llamaindex/readers": "workspace:*",
|
||||
"@llamaindex/replicate": "workspace:*",
|
||||
"@llamaindex/vllm": "workspace:*",
|
||||
"@mistralai/mistralai": "^1.3.4",
|
||||
"@mixedbread-ai/sdk": "^2.2.11",
|
||||
"@pinecone-database/pinecone": "^4.0.0",
|
||||
"@qdrant/js-client-rest": "^1.11.0",
|
||||
"@llamaindex/postgres": "workspace:*",
|
||||
"@llamaindex/azure": "workspace:*",
|
||||
"@llamaindex/astra": "workspace:*",
|
||||
"@llamaindex/milvus": "workspace:*",
|
||||
"@llamaindex/chroma": "workspace:*",
|
||||
"@llamaindex/mongodb": "workspace:*",
|
||||
"@llamaindex/pinecone": "workspace:*",
|
||||
"@llamaindex/qdrant": "workspace:*",
|
||||
"@llamaindex/upstash": "workspace:*",
|
||||
"@llamaindex/weaviate": "workspace:*",
|
||||
"@llamaindex/google": "workspace:*",
|
||||
"@llamaindex/mistral": "workspace:*",
|
||||
"@llamaindex/mixedbread": "workspace:*",
|
||||
"@llamaindex/cohere": "workspace:*",
|
||||
"@types/lodash": "^4.17.7",
|
||||
"@types/node": "^22.9.0",
|
||||
"@types/pg": "^8.11.8",
|
||||
"@upstash/vector": "^1.1.5",
|
||||
"@zilliz/milvus2-sdk-node": "^2.4.6",
|
||||
"ajv": "^8.17.1",
|
||||
"assemblyai": "^4.8.0",
|
||||
"chromadb": "1.9.2",
|
||||
"chromadb-default-embed": "^2.13.2",
|
||||
"cohere-ai": "7.14.0",
|
||||
"gpt-tokenizer": "^2.6.2",
|
||||
"groq-sdk": "^0.8.0",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"lodash": "^4.17.21",
|
||||
"magic-bytes.js": "^1.10.0",
|
||||
"mongodb": "^6.7.0",
|
||||
"openai": "^4.73.1",
|
||||
"pathe": "^1.1.2",
|
||||
"rake-modified": "^1.0.8",
|
||||
"weaviate-client": "^3.2.3",
|
||||
"wikipedia": "^2.1.2",
|
||||
"wink-nlp": "^2.3.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"pg": "^8.12.0",
|
||||
"pgvector": "0.2.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"pg": {
|
||||
"optional": true
|
||||
},
|
||||
"pgvector": {
|
||||
"optional": true
|
||||
}
|
||||
"magic-bytes.js": "^1.10.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@swc/cli": "^0.5.0",
|
||||
"@swc/core": "^1.9.2",
|
||||
"@vercel/postgres": "^0.10.0",
|
||||
"concurrently": "^9.1.0",
|
||||
"glob": "^11.0.0",
|
||||
"pg": "^8.12.0",
|
||||
"pgvector": "0.2.0",
|
||||
"postgres": "^3.4.4",
|
||||
"typescript": "^5.7.2"
|
||||
},
|
||||
"engines": {
|
||||
|
||||
@@ -13,11 +13,6 @@ import {
|
||||
} from "@llamaindex/core/node-parser";
|
||||
import { AsyncLocalStorage } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "./ServiceContext.js";
|
||||
import {
|
||||
getEmbeddedModel,
|
||||
setEmbeddedModel,
|
||||
withEmbeddedModel,
|
||||
} from "./internal/settings/EmbedModel.js";
|
||||
|
||||
export type PromptConfig = {
|
||||
llm?: string;
|
||||
@@ -84,15 +79,15 @@ class GlobalSettings implements Config {
|
||||
}
|
||||
|
||||
get embedModel(): BaseEmbedding {
|
||||
return getEmbeddedModel();
|
||||
return CoreSettings.embedModel;
|
||||
}
|
||||
|
||||
set embedModel(embedModel: BaseEmbedding) {
|
||||
setEmbeddedModel(embedModel);
|
||||
CoreSettings.embedModel = embedModel;
|
||||
}
|
||||
|
||||
withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
|
||||
return withEmbeddedModel(embedModel, fn);
|
||||
return CoreSettings.withEmbedModel(embedModel, fn);
|
||||
}
|
||||
|
||||
get nodeParser(): NodeParser {
|
||||
|
||||
@@ -1,39 +1 @@
|
||||
import { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { GeminiSession, GeminiSessionStore } from "../llm/gemini/base.js";
|
||||
import { GEMINI_BACKENDS } from "../llm/gemini/types.js";
|
||||
|
||||
export enum GEMINI_EMBEDDING_MODEL {
|
||||
EMBEDDING_001 = "embedding-001",
|
||||
TEXT_EMBEDDING_004 = "text-embedding-004",
|
||||
}
|
||||
|
||||
/**
|
||||
* GeminiEmbedding is an alias for Gemini that implements the BaseEmbedding interface.
|
||||
* Note: Vertex SDK currently does not support embeddings
|
||||
*/
|
||||
export class GeminiEmbedding extends BaseEmbedding {
|
||||
model: GEMINI_EMBEDDING_MODEL;
|
||||
session: GeminiSession;
|
||||
|
||||
constructor(init?: Partial<GeminiEmbedding>) {
|
||||
super();
|
||||
this.model = init?.model ?? GEMINI_EMBEDDING_MODEL.EMBEDDING_001;
|
||||
this.session =
|
||||
init?.session ??
|
||||
(GeminiSessionStore.get({
|
||||
backend: GEMINI_BACKENDS.GOOGLE,
|
||||
}) as GeminiSession);
|
||||
}
|
||||
|
||||
private async getEmbedding(prompt: string): Promise<number[]> {
|
||||
const client = this.session.getGenerativeModel({
|
||||
model: this.model,
|
||||
});
|
||||
const result = await client.embedContent(prompt);
|
||||
return result.embedding.values;
|
||||
}
|
||||
|
||||
getTextEmbedding(text: string): Promise<number[]> {
|
||||
return this.getEmbedding(text);
|
||||
}
|
||||
}
|
||||
export { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
|
||||
|
||||
@@ -1,33 +1,4 @@
|
||||
import { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { MistralAISession } from "../llm/mistral.js";
|
||||
|
||||
export enum MistralAIEmbeddingModelType {
|
||||
MISTRAL_EMBED = "mistral-embed",
|
||||
}
|
||||
|
||||
export class MistralAIEmbedding extends BaseEmbedding {
|
||||
model: MistralAIEmbeddingModelType;
|
||||
apiKey?: string;
|
||||
|
||||
private session: MistralAISession;
|
||||
|
||||
constructor(init?: Partial<MistralAIEmbedding>) {
|
||||
super();
|
||||
this.model = MistralAIEmbeddingModelType.MISTRAL_EMBED;
|
||||
this.session = new MistralAISession(init);
|
||||
}
|
||||
|
||||
private async getMistralAIEmbedding(input: string) {
|
||||
const client = await this.session.getClient();
|
||||
const { data } = await client.embeddings({
|
||||
model: this.model,
|
||||
input: [input],
|
||||
});
|
||||
|
||||
return data[0].embedding;
|
||||
}
|
||||
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
return this.getMistralAIEmbedding(text);
|
||||
}
|
||||
}
|
||||
export {
|
||||
MistralAIEmbedding,
|
||||
MistralAIEmbeddingModelType,
|
||||
} from "@llamaindex/mistral";
|
||||
|
||||
@@ -1,178 +1,4 @@
|
||||
import { BaseEmbedding, type EmbeddingInfo } from "@llamaindex/core/embeddings";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import { MixedbreadAI, MixedbreadAIClient } from "@mixedbread-ai/sdk";
|
||||
|
||||
type EmbeddingsRequestWithoutInput = Omit<
|
||||
MixedbreadAI.EmbeddingsRequest,
|
||||
"input"
|
||||
>;
|
||||
|
||||
/**
|
||||
* Interface extending EmbeddingsParams with additional
|
||||
* parameters specific to the MixedbreadAIEmbeddings class.
|
||||
*/
|
||||
export interface MixedbreadAIEmbeddingsParams
|
||||
extends Omit<EmbeddingsRequestWithoutInput, "model"> {
|
||||
/**
|
||||
* The model to use for generating embeddings.
|
||||
* @default {"mixedbread-ai/mxbai-embed-large-v1"}
|
||||
*/
|
||||
model?: string;
|
||||
|
||||
/**
|
||||
* The API key to use.
|
||||
* @default {process.env.MXBAI_API_KEY}
|
||||
*/
|
||||
apiKey?: string;
|
||||
|
||||
/**
|
||||
* The base URL for the API.
|
||||
*/
|
||||
baseUrl?: string;
|
||||
|
||||
/**
|
||||
* The maximum number of documents to embed in a single request.
|
||||
* @default {128}
|
||||
*/
|
||||
embedBatchSize?: number;
|
||||
|
||||
/**
|
||||
* The embed info for the model.
|
||||
*/
|
||||
embedInfo?: EmbeddingInfo;
|
||||
|
||||
/**
|
||||
* The maximum number of retries to attempt.
|
||||
* @default {3}
|
||||
*/
|
||||
maxRetries?: number;
|
||||
|
||||
/**
|
||||
* Timeouts for the request.
|
||||
*/
|
||||
timeoutInSeconds?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for generating embeddings using the mixedbread ai API.
|
||||
*
|
||||
* This class leverages the model "mixedbread-ai/mxbai-embed-large-v1" to generate
|
||||
* embeddings for text documents. The embeddings can be used for various NLP tasks
|
||||
* such as similarity comparison, clustering, or as features in machine learning models.
|
||||
*
|
||||
* @example
|
||||
* const mxbai = new MixedbreadAIEmbeddings({ apiKey: 'your-api-key' });
|
||||
* const texts = ["Baking bread is fun", "I love baking"];
|
||||
* const result = await mxbai.getTextEmbeddings(texts);
|
||||
* console.log(result);
|
||||
*
|
||||
* @example
|
||||
* const mxbai = new MixedbreadAIEmbeddings({
|
||||
* apiKey: 'your-api-key',
|
||||
* model: 'mixedbread-ai/mxbai-embed-large-v1',
|
||||
* encodingFormat: MixedbreadAI.EncodingFormat.Binary,
|
||||
* dimensions: 512,
|
||||
* normalized: true,
|
||||
* });
|
||||
* const query = "Represent this sentence for searching relevant passages: Is baking bread fun?";
|
||||
* const result = await mxbai.getTextEmbedding(query);
|
||||
* console.log(result);
|
||||
*/
|
||||
export class MixedbreadAIEmbeddings extends BaseEmbedding {
|
||||
requestParams: EmbeddingsRequestWithoutInput;
|
||||
requestOptions: MixedbreadAIClient.RequestOptions;
|
||||
private client: MixedbreadAIClient;
|
||||
|
||||
/**
|
||||
* Constructor for MixedbreadAIEmbeddings.
|
||||
* @param {Partial<MixedbreadAIEmbeddingsParams>} params - An optional object with properties to configure the instance.
|
||||
* @throws {Error} If the API key is not provided or found in the environment variables.
|
||||
* @throws {Error} If the batch size exceeds 256.
|
||||
*/
|
||||
constructor(params?: Partial<MixedbreadAIEmbeddingsParams>) {
|
||||
super();
|
||||
|
||||
const apiKey = params?.apiKey ?? getEnv("MXBAI_API_KEY");
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"mixedbread ai API key not found. Either provide it in the constructor or set the 'MXBAI_API_KEY' environment variable.",
|
||||
);
|
||||
}
|
||||
if (params?.embedBatchSize && params?.embedBatchSize > 256) {
|
||||
throw new Error(
|
||||
"The maximum batch size for mixedbread ai embeddings API is 256.",
|
||||
);
|
||||
}
|
||||
|
||||
this.embedBatchSize = params?.embedBatchSize ?? 128;
|
||||
if (params?.embedInfo) {
|
||||
this.embedInfo = params?.embedInfo;
|
||||
}
|
||||
this.requestParams = <EmbeddingsRequestWithoutInput>{
|
||||
model: params?.model ?? "mixedbread-ai/mxbai-embed-large-v1",
|
||||
normalized: params?.normalized,
|
||||
dimensions: params?.dimensions,
|
||||
encodingFormat: params?.encodingFormat,
|
||||
truncationStrategy: params?.truncationStrategy,
|
||||
prompt: params?.prompt,
|
||||
};
|
||||
this.requestOptions = {
|
||||
timeoutInSeconds: params?.timeoutInSeconds,
|
||||
maxRetries: params?.maxRetries ?? 3,
|
||||
// Support for this already exists in the python sdk and will be added to the js sdk soon
|
||||
// @ts-expect-error fixme
|
||||
additionalHeaders: {
|
||||
"user-agent": "@mixedbread-ai/llamaindex-ts-sdk",
|
||||
},
|
||||
};
|
||||
this.client = new MixedbreadAIClient(
|
||||
params?.baseUrl
|
||||
? {
|
||||
apiKey,
|
||||
environment: params?.baseUrl,
|
||||
}
|
||||
: {
|
||||
apiKey,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an embedding for a single text.
|
||||
* @param {string} text - A string to generate an embedding for.
|
||||
* @returns {Promise<number[]>} A Promise that resolves to an array of numbers representing the embedding.
|
||||
*
|
||||
* @example
|
||||
* const query = "Represent this sentence for searching relevant passages: Is baking bread fun?";
|
||||
* const result = await mxbai.getTextEmbedding(text);
|
||||
* console.log(result);
|
||||
*/
|
||||
async getTextEmbedding(text: string): Promise<number[]> {
|
||||
return (await this.getTextEmbeddings([text]))[0]!;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates embeddings for an array of texts.
|
||||
* @param {string[]} texts - An array of strings to generate embeddings for.
|
||||
* @returns {Promise<Array<number[]>>} A Promise that resolves to an array of embeddings.
|
||||
*
|
||||
* @example
|
||||
* const texts = ["Baking bread is fun", "I love baking"];
|
||||
* const result = await mxbai.getTextEmbeddings(texts);
|
||||
* console.log(result);
|
||||
*/
|
||||
getTextEmbeddings = async (texts: string[]): Promise<Array<number[]>> => {
|
||||
if (texts.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const response = await this.client.embeddings(
|
||||
{
|
||||
...this.requestParams,
|
||||
input: texts,
|
||||
},
|
||||
this.requestOptions,
|
||||
);
|
||||
return response.data.map((d) => d.embedding as number[]);
|
||||
};
|
||||
}
|
||||
export {
|
||||
MixedbreadAIEmbeddings,
|
||||
type MixedbreadAIEmbeddingsParams,
|
||||
} from "@llamaindex/mixedbread";
|
||||
|
||||
@@ -2,7 +2,7 @@ export * from "@llamaindex/core/embeddings";
|
||||
export { ClipEmbedding, ClipEmbeddingModelType } from "./ClipEmbedding.js";
|
||||
export { DeepInfraEmbedding } from "./DeepInfraEmbedding.js";
|
||||
export { FireworksEmbedding } from "./fireworks.js";
|
||||
export * from "./GeminiEmbedding.js";
|
||||
export { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "./GeminiEmbedding.js";
|
||||
export * from "./HuggingFaceEmbedding.js";
|
||||
export * from "./JinaAIEmbedding.js";
|
||||
export * from "./MistralAIEmbedding.js";
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
//#region initial setup for OpenAI
|
||||
import { OpenAI } from "@llamaindex/openai";
|
||||
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
|
||||
import { Settings } from "./Settings.js";
|
||||
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
Settings.llm;
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
Settings.embedModel;
|
||||
} catch {
|
||||
Settings.llm = new OpenAI();
|
||||
Settings.embedModel = new OpenAIEmbedding();
|
||||
}
|
||||
|
||||
//#endregion
|
||||
|
||||
@@ -7,12 +7,14 @@ export {
|
||||
HuggingFaceEmbeddingModelType,
|
||||
} from "./embeddings/HuggingFaceEmbedding.js";
|
||||
|
||||
export { type VertexGeminiSessionOptions } from "./llm/gemini/types.js";
|
||||
export { GeminiVertexSession } from "./llm/gemini/vertex.js";
|
||||
export {
|
||||
GeminiVertexSession,
|
||||
type VertexGeminiSessionOptions,
|
||||
} from "@llamaindex/google";
|
||||
|
||||
// Expose AzureDynamicSessionTool for node.js runtime only
|
||||
export { AzureDynamicSessionTool } from "@llamaindex/azure";
|
||||
export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js";
|
||||
export { AzureDynamicSessionTool } from "./tools/AzureDynamicSessionTool.node.js";
|
||||
|
||||
// Don't export vector store modules for non-node.js runtime on top level,
|
||||
// as we cannot guarantee that they will work in other environments
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
} from "@llamaindex/core/schema";
|
||||
import type { BaseIndexStore } from "@llamaindex/core/storage/index-store";
|
||||
import { extractText } from "@llamaindex/core/utils";
|
||||
import { VectorStoreQueryMode } from "@llamaindex/core/vector-store";
|
||||
import type { ServiceContext } from "../../ServiceContext.js";
|
||||
import { nodeParserFromSettingsOrContext } from "../../Settings.js";
|
||||
import { RetrieverQueryEngine } from "../../engines/query/RetrieverQueryEngine.js";
|
||||
@@ -38,7 +39,6 @@ import type {
|
||||
VectorStoreByType,
|
||||
VectorStoreQueryResult,
|
||||
} from "../../vector-store/index.js";
|
||||
import { VectorStoreQueryMode } from "../../vector-store/types.js";
|
||||
import type { BaseIndexInit } from "../BaseIndex.js";
|
||||
import { BaseIndex } from "../BaseIndex.js";
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
} from "@llamaindex/core/vector-store";
|
||||
import { IngestionCache, getTransformationHash } from "./IngestionCache.js";
|
||||
import {
|
||||
DocStoreStrategy,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { BaseNode } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
|
||||
import { classify } from "./classify.js";
|
||||
import { RollbackableTransformComponent } from "./rollback.js";
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { BaseNode } from "@llamaindex/core/schema";
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
|
||||
import { classify } from "./classify.js";
|
||||
import { RollbackableTransformComponent } from "./rollback.js";
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import type { BaseVectorStore } from "../../vector-store/types.js";
|
||||
import type { BaseVectorStore } from "@llamaindex/core/vector-store";
|
||||
import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
|
||||
import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
|
||||
import { UpsertsStrategy } from "./UpsertsStrategy.js";
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
export * from "@llamaindex/google";
|
||||
@@ -6,11 +6,12 @@ export {
|
||||
Anthropic,
|
||||
} from "./anthropic.js";
|
||||
export { FireworksLLM } from "./fireworks.js";
|
||||
export { Gemini, GeminiSession } from "./gemini/base.js";
|
||||
export {
|
||||
GEMINI_MODEL,
|
||||
Gemini,
|
||||
GeminiSession,
|
||||
type GoogleGeminiSessionOptions,
|
||||
} from "./gemini/types.js";
|
||||
} from "./google.js";
|
||||
export * from "./groq.js";
|
||||
export { HuggingFaceInferenceAPI, HuggingFaceLLM } from "./huggingface.js";
|
||||
export {
|
||||
|
||||
@@ -1,138 +1 @@
|
||||
import {
|
||||
BaseLLM,
|
||||
type ChatMessage,
|
||||
type ChatResponse,
|
||||
type ChatResponseChunk,
|
||||
type LLMChatParamsNonStreaming,
|
||||
type LLMChatParamsStreaming,
|
||||
} from "@llamaindex/core/llms";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
|
||||
export const ALL_AVAILABLE_MISTRAL_MODELS = {
|
||||
"mistral-tiny": { contextWindow: 32000 },
|
||||
"mistral-small": { contextWindow: 32000 },
|
||||
"mistral-medium": { contextWindow: 32000 },
|
||||
};
|
||||
|
||||
export class MistralAISession {
|
||||
apiKey: string;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private client: any;
|
||||
|
||||
constructor(init?: { apiKey?: string | undefined }) {
|
||||
if (init?.apiKey) {
|
||||
this.apiKey = init?.apiKey;
|
||||
} else {
|
||||
this.apiKey = getEnv("MISTRAL_API_KEY")!;
|
||||
}
|
||||
if (!this.apiKey) {
|
||||
throw new Error("Set Mistral API key in MISTRAL_API_KEY env variable"); // Overriding MistralAI package's error message
|
||||
}
|
||||
}
|
||||
|
||||
async getClient() {
|
||||
const { Mistral } = await import("@mistralai/mistralai");
|
||||
if (!this.client) {
|
||||
this.client = new Mistral({
|
||||
apiKey: this.apiKey,
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* MistralAI LLM implementation
|
||||
*/
|
||||
export class MistralAI extends BaseLLM {
|
||||
// Per completion MistralAI params
|
||||
model: keyof typeof ALL_AVAILABLE_MISTRAL_MODELS;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number | undefined;
|
||||
apiKey?: string;
|
||||
safeMode: boolean;
|
||||
randomSeed?: number | undefined;
|
||||
|
||||
private session: MistralAISession;
|
||||
|
||||
constructor(init?: Partial<MistralAI>) {
|
||||
super();
|
||||
this.model = init?.model ?? "mistral-small";
|
||||
this.temperature = init?.temperature ?? 0.1;
|
||||
this.topP = init?.topP ?? 1;
|
||||
this.maxTokens = init?.maxTokens ?? undefined;
|
||||
this.safeMode = init?.safeMode ?? false;
|
||||
this.randomSeed = init?.randomSeed ?? undefined;
|
||||
this.session = new MistralAISession(init);
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow: ALL_AVAILABLE_MISTRAL_MODELS[this.model].contextWindow,
|
||||
tokenizer: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private buildParams(messages: ChatMessage[]): any {
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
maxTokens: this.maxTokens,
|
||||
topP: this.topP,
|
||||
safeMode: this.safeMode,
|
||||
randomSeed: this.randomSeed,
|
||||
messages,
|
||||
};
|
||||
}
|
||||
|
||||
chat(
|
||||
params: LLMChatParamsStreaming,
|
||||
): Promise<AsyncIterable<ChatResponseChunk>>;
|
||||
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
|
||||
async chat(
|
||||
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
|
||||
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
|
||||
const { messages, stream } = params;
|
||||
// Streaming
|
||||
if (stream) {
|
||||
return this.streamChat(params);
|
||||
}
|
||||
// Non-streaming
|
||||
const client = await this.session.getClient();
|
||||
const response = await client.chat(this.buildParams(messages));
|
||||
const message = response.choices[0].message;
|
||||
return {
|
||||
raw: response,
|
||||
message,
|
||||
};
|
||||
}
|
||||
|
||||
protected async *streamChat({
|
||||
messages,
|
||||
}: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
|
||||
const client = await this.session.getClient();
|
||||
const chunkStream = await client.chatStream(this.buildParams(messages));
|
||||
|
||||
//Indices
|
||||
let idx_counter: number = 0;
|
||||
for await (const part of chunkStream) {
|
||||
if (!part.choices.length) continue;
|
||||
|
||||
part.choices[0].index = idx_counter;
|
||||
|
||||
idx_counter++;
|
||||
|
||||
yield {
|
||||
raw: part,
|
||||
delta: part.choices[0].delta.content ?? "",
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/mistral";
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
export * from "./CohereRerank.js";
|
||||
export * from "@llamaindex/cohere";
|
||||
export {
|
||||
MixedbreadAIReranker,
|
||||
type MixedbreadAIRerankerParams,
|
||||
} from "@llamaindex/mixedbread";
|
||||
export * from "./JinaAIReranker.js";
|
||||
export * from "./MixedbreadAIReranker.js";
|
||||
|
||||
@@ -9,13 +9,13 @@ import {
|
||||
BaseIndexStore,
|
||||
SimpleIndexStore,
|
||||
} from "@llamaindex/core/storage/index-store";
|
||||
import { path } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "../ServiceContext.js";
|
||||
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
|
||||
import type {
|
||||
BaseVectorStore,
|
||||
VectorStoreByType,
|
||||
} from "../vector-store/types.js";
|
||||
} from "@llamaindex/core/vector-store";
|
||||
import { path } from "@llamaindex/env";
|
||||
import type { ServiceContext } from "../ServiceContext.js";
|
||||
import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js";
|
||||
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
|
||||
|
||||
export interface StorageContext {
|
||||
|
||||
@@ -3,13 +3,13 @@ import {
|
||||
DEFAULT_NAMESPACE,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "@llamaindex/core/global";
|
||||
import { KVDocumentStore } from "@llamaindex/core/storage/doc-store";
|
||||
import {
|
||||
BaseInMemoryKVStore,
|
||||
SimpleKVStore,
|
||||
} from "@llamaindex/core/storage/kv-store";
|
||||
import { path } from "@llamaindex/env";
|
||||
import _ from "lodash";
|
||||
import { KVDocumentStore } from "./KVDocumentStore.js";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
type SaveDict = Record<string, any>;
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
export * from "@llamaindex/azure/storage";
|
||||
export * from "@llamaindex/core/storage/chat-store";
|
||||
export * from "@llamaindex/core/storage/doc-store";
|
||||
export * from "@llamaindex/core/storage/index-store";
|
||||
export * from "@llamaindex/core/storage/kv-store";
|
||||
export * from "./chatStore/AzureCosmosMongovCoreChatStore.js";
|
||||
export * from "./chatStore/AzureCosmosNoSqlChatStore.js";
|
||||
export * from "./docStore/AzureCosmosMongovCoreDocumentStore.js";
|
||||
export * from "./docStore/AzureCosmosNoSqlDocumentStore.js";
|
||||
export { PostgresDocumentStore } from "./docStore/PostgresDocumentStore.js";
|
||||
export {
|
||||
PostgresDocumentStore,
|
||||
PostgresIndexStore,
|
||||
PostgresKVStore,
|
||||
} from "@llamaindex/postgres";
|
||||
export { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js";
|
||||
export * from "./FileSystem.js";
|
||||
export * from "./indexStore/AzureCosmosMongovCoreIndexStore.js";
|
||||
export * from "./indexStore/AzureCosmosNoSqlIndexStore.js";
|
||||
export { PostgresIndexStore } from "./indexStore/PostgresIndexStore.js";
|
||||
export * from "./kvStore/AzureCosmosMongovCoreKVStore.js";
|
||||
export * from "./kvStore/AzureCosmosNoSqlKVStore.js";
|
||||
export { PostgresKVStore } from "./kvStore/PostgresKVStore.js";
|
||||
|
||||
export * from "./StorageContext.js";
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
import {
|
||||
AzureDynamicSessionTool,
|
||||
type AzureDynamicSessionToolParams,
|
||||
} from "./AzureDynamicSessionTool.node.js";
|
||||
import { WikipediaTool, type WikipediaToolParams } from "./WikipediaTool.js";
|
||||
} from "@llamaindex/azure";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-namespace
|
||||
export namespace ToolsFactory {
|
||||
type ToolsMap = {
|
||||
[Tools.Wikipedia]: typeof WikipediaTool;
|
||||
[Tools.AzureCodeInterpreter]: typeof AzureDynamicSessionTool;
|
||||
};
|
||||
|
||||
export enum Tools {
|
||||
Wikipedia = "wikipedia.WikipediaToolSpec",
|
||||
AzureCodeInterpreter = "azure_code_interpreter.AzureCodeInterpreterToolSpec",
|
||||
}
|
||||
|
||||
@@ -20,12 +17,6 @@ export namespace ToolsFactory {
|
||||
key: Tool,
|
||||
...params: ConstructorParameters<ToolsMap[Tool]>
|
||||
): Promise<InstanceType<ToolsMap[Tool]>> {
|
||||
if (key === Tools.Wikipedia) {
|
||||
return new WikipediaTool(
|
||||
...(params as WikipediaToolParams[]),
|
||||
) as InstanceType<ToolsMap[Tool]>;
|
||||
}
|
||||
|
||||
if (key === Tools.AzureCodeInterpreter) {
|
||||
return new AzureDynamicSessionTool(
|
||||
...(params as AzureDynamicSessionToolParams[]),
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
export * from "@llamaindex/core/tools";
|
||||
export * from "./QueryEngineTool.js";
|
||||
export * from "./WikipediaTool.js";
|
||||
|
||||
@@ -1,270 +1 @@
|
||||
import {
|
||||
Collection,
|
||||
DataAPIClient,
|
||||
Db,
|
||||
type Filter,
|
||||
type FindOptions,
|
||||
type SomeDoc,
|
||||
} from "@datastax/astra-db-ts";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
} from "./utils.js";
|
||||
|
||||
export class AstraDBVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
|
||||
idKey: string;
|
||||
contentKey: string;
|
||||
|
||||
private astraClient: DataAPIClient;
|
||||
private astraDB: Db;
|
||||
private collection: Collection | undefined;
|
||||
|
||||
constructor(
|
||||
init?: Partial<AstraDBVectorStore> & {
|
||||
params?: {
|
||||
token: string;
|
||||
endpoint: string;
|
||||
namespace?: string;
|
||||
};
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
const token = init?.params?.token ?? getEnv("ASTRA_DB_APPLICATION_TOKEN");
|
||||
const endpoint = init?.params?.endpoint ?? getEnv("ASTRA_DB_API_ENDPOINT");
|
||||
|
||||
if (!token) {
|
||||
throw new Error(
|
||||
"Must specify ASTRA_DB_APPLICATION_TOKEN via env variable.",
|
||||
);
|
||||
}
|
||||
if (!endpoint) {
|
||||
throw new Error("Must specify ASTRA_DB_API_ENDPOINT via env variable.");
|
||||
}
|
||||
const namespace =
|
||||
init?.params?.namespace ??
|
||||
getEnv("ASTRA_DB_NAMESPACE") ??
|
||||
"default_keyspace";
|
||||
this.astraClient = new DataAPIClient(token, {
|
||||
caller: ["LlamaIndexTS"],
|
||||
});
|
||||
this.astraDB = this.astraClient.db(endpoint, { namespace });
|
||||
|
||||
this.idKey = init?.idKey ?? "_id";
|
||||
this.contentKey = init?.contentKey ?? "content";
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new collection in your Astra DB vector database and connects to it.
|
||||
* You must call this method or `connect` before adding, deleting, or querying.
|
||||
*
|
||||
* @param collection - Your new collection's name
|
||||
* @param options - CreateCollectionOptions used to set the number of vector dimensions and similarity metric
|
||||
* @returns Promise that resolves if the creation did not throw an error.
|
||||
*/
|
||||
async createAndConnect(
|
||||
collection: string,
|
||||
options?: Parameters<Db["createCollection"]>[1],
|
||||
): Promise<void> {
|
||||
this.collection = await this.astraDB.createCollection(collection, options);
|
||||
console.debug("Created Astra DB collection");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to an existing collection in your Astra DB vector database.
|
||||
* You must call this method or `createAndConnect` before adding, deleting, or querying.
|
||||
*
|
||||
* @param collection - Your existing collection's name
|
||||
* @returns Promise that resolves if the connection did not throw an error.
|
||||
*/
|
||||
async connect(collection: string): Promise<void> {
|
||||
this.collection = await this.astraDB.collection(collection);
|
||||
console.debug("Connected to Astra DB collection");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance of your Astra DB client.
|
||||
* @returns the AstraDB client
|
||||
*/
|
||||
client(): DataAPIClient {
|
||||
return this.astraClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add your document(s) to your Astra DB collection.
|
||||
*
|
||||
* @returns an array of node ids which were added
|
||||
*/
|
||||
async add(nodes: BaseNode[]): Promise<string[]> {
|
||||
if (!this.collection) {
|
||||
throw new Error("Must connect to collection before adding.");
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
if (!nodes || nodes.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const dataToInsert = nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.contentKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
|
||||
return {
|
||||
$vector: node.getEmbedding(),
|
||||
[this.idKey]: node.id_,
|
||||
[this.contentKey]: node.getContent(MetadataMode.NONE),
|
||||
...metadata,
|
||||
};
|
||||
});
|
||||
|
||||
console.debug(`Adding ${dataToInsert.length} rows to table`);
|
||||
|
||||
const insertResult = await collection.insertMany(dataToInsert);
|
||||
|
||||
return insertResult.insertedIds as string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a document from your Astra DB collection.
|
||||
*
|
||||
* @param refDocId - The id of the document to delete
|
||||
* @param deleteOptions - DeleteOneOptions to pass to the delete query
|
||||
* @returns Promise that resolves if the delete query did not throw an error.
|
||||
*/
|
||||
async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: Parameters<Collection["deleteOne"]>[1],
|
||||
): Promise<void> {
|
||||
if (!this.collection) {
|
||||
throw new Error("Must connect to collection before deleting.");
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
console.debug(`Deleting row with id ${refDocId}`);
|
||||
|
||||
await collection.deleteOne(
|
||||
{
|
||||
_id: refDocId,
|
||||
},
|
||||
deleteOptions,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Query documents from your Astra DB collection to get the closest match to your embedding.
|
||||
*
|
||||
* @param query - VectorStoreQuery
|
||||
* @param options - FindOptions
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: Parameters<Collection["find"]>[1],
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
if (!this.collection) {
|
||||
throw new Error("Must connect to collection before querying.");
|
||||
}
|
||||
const collection = this.collection;
|
||||
|
||||
const astraFilter = this.toAstraFilter(query.filters);
|
||||
const cursor = await collection.find(astraFilter, <FindOptions>{
|
||||
...options,
|
||||
sort: query.queryEmbedding
|
||||
? { $vector: query.queryEmbedding }
|
||||
: options?.sort,
|
||||
limit: query.similarityTopK,
|
||||
includeSimilarity: true,
|
||||
});
|
||||
|
||||
const nodes: BaseNode[] = [];
|
||||
const ids: string[] = [];
|
||||
const similarities: number[] = [];
|
||||
|
||||
for await (const row of cursor) {
|
||||
const {
|
||||
$vector: embedding,
|
||||
$similarity: similarity,
|
||||
[this.idKey]: id,
|
||||
[this.contentKey]: content,
|
||||
...metadata
|
||||
} = row;
|
||||
|
||||
const node = metadataDictToNode(metadata, {
|
||||
fallback: {
|
||||
id,
|
||||
text: content,
|
||||
...metadata,
|
||||
},
|
||||
});
|
||||
node.setContent(content);
|
||||
|
||||
ids.push(id);
|
||||
similarities.push(similarity);
|
||||
nodes.push(node);
|
||||
}
|
||||
|
||||
return {
|
||||
similarities,
|
||||
ids,
|
||||
nodes,
|
||||
};
|
||||
}
|
||||
|
||||
private toAstraFilter(filters?: MetadataFilters): Filter<SomeDoc> {
|
||||
if (!filters || filters.filters?.length === 0) return {};
|
||||
const condition = filters.condition ?? FilterCondition.AND;
|
||||
const listFilter = filters.filters.map((f) => this.buildFilterItem(f));
|
||||
if (condition === FilterCondition.OR) return { $or: listFilter };
|
||||
if (condition === FilterCondition.AND) return { $and: listFilter };
|
||||
throw new Error(`Not supported filter condition: ${condition}`);
|
||||
}
|
||||
|
||||
private buildFilterItem(filter: MetadataFilter): Filter<SomeDoc> {
|
||||
const { key, operator, value } = filter;
|
||||
switch (operator) {
|
||||
case FilterOperator.EQ:
|
||||
return { [key]: value };
|
||||
case FilterOperator.NE:
|
||||
return { [key]: { $ne: value } };
|
||||
case FilterOperator.GT:
|
||||
return { [key]: { $gt: value } };
|
||||
case FilterOperator.LT:
|
||||
return { [key]: { $lt: value } };
|
||||
case FilterOperator.GTE:
|
||||
return { [key]: { $gte: value } };
|
||||
case FilterOperator.LTE:
|
||||
return { [key]: { $lte: value } };
|
||||
case FilterOperator.IN:
|
||||
return { [key]: { $in: parseArrayValue(value) } };
|
||||
case FilterOperator.NIN:
|
||||
return { [key]: { $nin: parseArrayValue(value) } };
|
||||
case FilterOperator.IS_EMPTY:
|
||||
return { [key]: { $size: 0 } };
|
||||
default:
|
||||
throw new Error(`Not supported filter operator: ${operator}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/astra";
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
export * from "@llamaindex/azure";
|
||||
@@ -1,235 +1 @@
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import {
|
||||
ChromaClient,
|
||||
type ChromaClientParams,
|
||||
type DeleteParams,
|
||||
type QueryRecordsParams,
|
||||
type QueryResponse,
|
||||
type Where,
|
||||
type WhereDocument,
|
||||
} from "chromadb";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
VectorStoreQueryMode,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
type ChromaDeleteOptions = {
|
||||
where?: Where;
|
||||
whereDocument?: WhereDocument;
|
||||
};
|
||||
|
||||
type ChromaQueryOptions = {
|
||||
whereDocument?: WhereDocument;
|
||||
};
|
||||
|
||||
type Collection = Awaited<ReturnType<ChromaClient["getOrCreateCollection"]>>;
|
||||
|
||||
const DEFAULT_TEXT_KEY = "text";
|
||||
|
||||
type ChromaFilterCondition = "$and" | "$or";
|
||||
type ChromaFilterOperator =
|
||||
| "$eq"
|
||||
| "$ne"
|
||||
| "$gt"
|
||||
| "$lt"
|
||||
| "$gte"
|
||||
| "$lte"
|
||||
| "$in"
|
||||
| "$nin";
|
||||
|
||||
export class ChromaVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
textKey: string;
|
||||
private chromaClient: ChromaClient;
|
||||
private collection: Collection | null = null;
|
||||
private collectionName: string;
|
||||
|
||||
constructor(
|
||||
init: {
|
||||
collectionName: string;
|
||||
textKey?: string;
|
||||
chromaClientParams?: ChromaClientParams;
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
this.collectionName = init.collectionName;
|
||||
this.chromaClient = new ChromaClient(init.chromaClientParams);
|
||||
this.textKey = init.textKey ?? DEFAULT_TEXT_KEY;
|
||||
}
|
||||
|
||||
client(): ChromaClient {
|
||||
return this.chromaClient;
|
||||
}
|
||||
|
||||
async getCollection(): Promise<Collection> {
|
||||
if (!this.collection) {
|
||||
const coll = await this.chromaClient.getOrCreateCollection({
|
||||
name: this.collectionName,
|
||||
});
|
||||
this.collection = coll;
|
||||
}
|
||||
return this.collection;
|
||||
}
|
||||
|
||||
private getDataToInsert(nodes: BaseNode[]) {
|
||||
const metadatas = nodes.map((node) =>
|
||||
nodeToMetadata(node, true, this.textKey, this.flatMetadata),
|
||||
);
|
||||
return {
|
||||
embeddings: nodes.map((node) => node.getEmbedding()),
|
||||
ids: nodes.map((node) => node.id_),
|
||||
metadatas,
|
||||
documents: nodes.map((node) => node.getContent(MetadataMode.NONE)),
|
||||
};
|
||||
}
|
||||
|
||||
async add(nodes: BaseNode[]): Promise<string[]> {
|
||||
if (!nodes || nodes.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const dataToInsert = this.getDataToInsert(nodes);
|
||||
const collection = await this.getCollection();
|
||||
await collection.add(dataToInsert);
|
||||
return nodes.map((node) => node.id_);
|
||||
}
|
||||
|
||||
async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: ChromaDeleteOptions,
|
||||
): Promise<void> {
|
||||
const collection = await this.getCollection();
|
||||
await collection.delete(<DeleteParams>{
|
||||
ids: [refDocId],
|
||||
where: deleteOptions?.where,
|
||||
whereDocument: deleteOptions?.whereDocument,
|
||||
});
|
||||
}
|
||||
|
||||
private transformChromaFilterCondition(
|
||||
condition: FilterCondition,
|
||||
): ChromaFilterCondition {
|
||||
switch (condition) {
|
||||
case FilterCondition.AND:
|
||||
return "$and";
|
||||
case FilterCondition.OR:
|
||||
return "$or";
|
||||
default:
|
||||
throw new Error(`Filter condition ${condition} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
private transformChromaFilterOperator(
|
||||
operator: FilterOperator,
|
||||
): ChromaFilterOperator {
|
||||
switch (operator) {
|
||||
case FilterOperator.EQ:
|
||||
return "$eq";
|
||||
case FilterOperator.NE:
|
||||
return "$ne";
|
||||
case FilterOperator.GT:
|
||||
return "$gt";
|
||||
case FilterOperator.LT:
|
||||
return "$lt";
|
||||
case FilterOperator.GTE:
|
||||
return "$gte";
|
||||
case FilterOperator.LTE:
|
||||
return "$lte";
|
||||
case FilterOperator.IN:
|
||||
return "$in";
|
||||
case FilterOperator.NIN:
|
||||
return "$nin";
|
||||
default:
|
||||
throw new Error(`Filter operator ${operator} not supported`);
|
||||
}
|
||||
}
|
||||
|
||||
private toChromaFilter(filters: MetadataFilters): Where {
|
||||
const chromaFilter: Where = {};
|
||||
const filtersList: Where[] = [];
|
||||
|
||||
const condition = filters.condition
|
||||
? this.transformChromaFilterCondition(
|
||||
filters.condition as FilterCondition,
|
||||
)
|
||||
: "$and";
|
||||
|
||||
if (filters.filters) {
|
||||
for (const filter of filters.filters) {
|
||||
if (filter.operator) {
|
||||
filtersList.push({
|
||||
[filter.key]: {
|
||||
[this.transformChromaFilterOperator(
|
||||
filter.operator as FilterOperator,
|
||||
)]: filter.value,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
filtersList.push({ [filter.key]: filter.value });
|
||||
}
|
||||
}
|
||||
|
||||
if (filtersList.length === 1) {
|
||||
return filtersList[0]!;
|
||||
} else if (filtersList.length > 1) {
|
||||
chromaFilter[condition] = filtersList;
|
||||
}
|
||||
}
|
||||
|
||||
return chromaFilter;
|
||||
}
|
||||
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: ChromaQueryOptions,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
if (query.docIds) {
|
||||
throw new Error("ChromaDB does not support querying by docIDs");
|
||||
}
|
||||
if (query.mode != VectorStoreQueryMode.DEFAULT) {
|
||||
throw new Error("ChromaDB does not support querying by mode");
|
||||
}
|
||||
|
||||
let chromaWhere: Where = {};
|
||||
if (query.filters) {
|
||||
chromaWhere = this.toChromaFilter(query.filters);
|
||||
}
|
||||
|
||||
const collection = await this.getCollection();
|
||||
const queryResponse: QueryResponse = await collection.query(<
|
||||
QueryRecordsParams
|
||||
>{
|
||||
queryEmbeddings: query.queryEmbedding ?? undefined,
|
||||
queryTexts: query.queryStr ?? undefined,
|
||||
nResults: query.similarityTopK,
|
||||
where: Object.keys(chromaWhere).length ? chromaWhere : undefined,
|
||||
whereDocument: options?.whereDocument,
|
||||
//ChromaDB doesn't return the result embeddings by default so we need to include them
|
||||
include: ["distances", "metadatas", "documents", "embeddings"],
|
||||
});
|
||||
|
||||
const vectorStoreQueryResult: VectorStoreQueryResult = {
|
||||
nodes: queryResponse.ids[0]!.map((id, index) => {
|
||||
const text = (queryResponse.documents as string[][])[0]![index];
|
||||
const metaData = queryResponse.metadatas[0]![index] ?? {};
|
||||
const node = metadataDictToNode(metaData);
|
||||
node.setContent(text);
|
||||
return node;
|
||||
}),
|
||||
similarities: (queryResponse.distances as number[][])[0]!.map(
|
||||
(distance) => 1 - distance,
|
||||
),
|
||||
ids: queryResponse.ids[0]!,
|
||||
};
|
||||
return vectorStoreQueryResult;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/chroma";
|
||||
|
||||
@@ -1,279 +1 @@
|
||||
import type { ChannelOptions } from "@grpc/grpc-js";
|
||||
import { BaseNode, MetadataMode, type Metadata } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import {
|
||||
DataType,
|
||||
MilvusClient,
|
||||
type ClientConfig,
|
||||
type DeleteReq,
|
||||
type RowData,
|
||||
type SearchSimpleReq,
|
||||
} from "@zilliz/milvus2-sdk-node";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
parsePrimitiveValue,
|
||||
} from "./utils.js";
|
||||
|
||||
function parseScalarFilters(scalarFilters: MetadataFilters): string {
|
||||
const condition = scalarFilters.condition ?? "and";
|
||||
const filters: string[] = [];
|
||||
|
||||
for (const filter of scalarFilters.filters) {
|
||||
switch (filter.operator) {
|
||||
case "==":
|
||||
case "!=": {
|
||||
filters.push(
|
||||
`metadata["${filter.key}"] ${filter.operator} "${parsePrimitiveValue(filter.value)}"`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
case "in": {
|
||||
const filterValue = parseArrayValue(filter.value)
|
||||
.map((v) => `"${v}"`)
|
||||
.join(", ");
|
||||
filters.push(
|
||||
`metadata["${filter.key}"] ${filter.operator} [${filterValue}]`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
case "nin": {
|
||||
// Milvus does not support `nin` operator, so we need to manually check every value
|
||||
// Expected: not metadata["key"] != "value1" and not metadata["key"] != "value2"
|
||||
const filterStr = parseArrayValue(filter.value)
|
||||
.map((v) => `metadata["${filter.key}"] != "${v}"`)
|
||||
.join(" && ");
|
||||
filters.push(filterStr);
|
||||
break;
|
||||
}
|
||||
case "<":
|
||||
case "<=":
|
||||
case ">":
|
||||
case ">=": {
|
||||
filters.push(
|
||||
`metadata["${filter.key}"] ${filter.operator} ${parsePrimitiveValue(filter.value)}`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new Error(`Operator ${filter.operator} is not supported.`);
|
||||
}
|
||||
}
|
||||
|
||||
return filters.join(` ${condition} `);
|
||||
}
|
||||
|
||||
export class MilvusVectorStore extends BaseVectorStore {
|
||||
public storesText: boolean = true;
|
||||
public isEmbeddingQuery?: boolean = false;
|
||||
private flatMetadata: boolean = true;
|
||||
|
||||
private milvusClient: MilvusClient;
|
||||
private collectionInitialized = false;
|
||||
private collectionName: string;
|
||||
|
||||
private idKey: string;
|
||||
private contentKey: string;
|
||||
private metadataKey: string;
|
||||
private embeddingKey: string;
|
||||
|
||||
constructor(
|
||||
init?: Partial<{ milvusClient: MilvusClient }> &
|
||||
VectorStoreBaseParams & {
|
||||
params?: {
|
||||
configOrAddress: ClientConfig | string;
|
||||
ssl?: boolean;
|
||||
username?: string;
|
||||
password?: string;
|
||||
channelOptions?: ChannelOptions;
|
||||
};
|
||||
collection?: string;
|
||||
idKey?: string;
|
||||
contentKey?: string;
|
||||
metadataKey?: string;
|
||||
embeddingKey?: string;
|
||||
},
|
||||
) {
|
||||
super(init);
|
||||
if (init?.milvusClient) {
|
||||
this.milvusClient = init.milvusClient;
|
||||
} else {
|
||||
const configOrAddress =
|
||||
init?.params?.configOrAddress ?? getEnv("MILVUS_ADDRESS");
|
||||
const ssl = init?.params?.ssl ?? getEnv("MILVUS_SSL") === "true";
|
||||
const username = init?.params?.username ?? getEnv("MILVUS_USERNAME");
|
||||
const password = init?.params?.password ?? getEnv("MILVUS_PASSWORD");
|
||||
|
||||
if (!configOrAddress) {
|
||||
throw new Error("Must specify MILVUS_ADDRESS via env variable.");
|
||||
}
|
||||
this.milvusClient = new MilvusClient(
|
||||
configOrAddress,
|
||||
ssl,
|
||||
username,
|
||||
password,
|
||||
init?.params?.channelOptions,
|
||||
);
|
||||
}
|
||||
|
||||
this.collectionName = init?.collection ?? "llamacollection";
|
||||
this.idKey = init?.idKey ?? "id";
|
||||
this.contentKey = init?.contentKey ?? "content";
|
||||
this.metadataKey = init?.metadataKey ?? "metadata";
|
||||
this.embeddingKey = init?.embeddingKey ?? "embedding";
|
||||
}
|
||||
|
||||
public client(): MilvusClient {
|
||||
return this.milvusClient;
|
||||
}
|
||||
|
||||
private async createCollection() {
|
||||
await this.milvusClient.createCollection({
|
||||
collection_name: this.collectionName,
|
||||
fields: [
|
||||
{
|
||||
name: this.idKey,
|
||||
data_type: DataType.VarChar,
|
||||
is_primary_key: true,
|
||||
max_length: 200,
|
||||
},
|
||||
{
|
||||
name: this.embeddingKey,
|
||||
data_type: DataType.FloatVector,
|
||||
dim: 1536,
|
||||
},
|
||||
{
|
||||
name: this.contentKey,
|
||||
data_type: DataType.VarChar,
|
||||
max_length: 9000,
|
||||
},
|
||||
{
|
||||
name: this.metadataKey,
|
||||
data_type: DataType.JSON,
|
||||
},
|
||||
],
|
||||
});
|
||||
await this.milvusClient.createIndex({
|
||||
collection_name: this.collectionName,
|
||||
field_name: this.embeddingKey,
|
||||
});
|
||||
}
|
||||
|
||||
private async ensureCollection(): Promise<void> {
|
||||
if (!this.collectionInitialized) {
|
||||
await this.milvusClient.connectPromise;
|
||||
|
||||
// Check collection exists
|
||||
const isCollectionExist = await this.milvusClient.hasCollection({
|
||||
collection_name: this.collectionName,
|
||||
});
|
||||
if (!isCollectionExist.value) {
|
||||
await this.createCollection();
|
||||
}
|
||||
|
||||
await this.milvusClient.loadCollectionSync({
|
||||
collection_name: this.collectionName,
|
||||
});
|
||||
this.collectionInitialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
public async add(nodes: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
await this.ensureCollection();
|
||||
|
||||
const result = await this.milvusClient.insert({
|
||||
collection_name: this.collectionName,
|
||||
data: nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.contentKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
|
||||
const entry: RowData = {
|
||||
[this.idKey]: node.id_,
|
||||
[this.embeddingKey]: node.getEmbedding(),
|
||||
[this.contentKey]: node.getContent(MetadataMode.NONE),
|
||||
[this.metadataKey]: metadata,
|
||||
};
|
||||
|
||||
return entry;
|
||||
}),
|
||||
});
|
||||
|
||||
if (!result.IDs) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if ("int_id" in result.IDs) {
|
||||
return result.IDs.int_id.data.map((i) => String(i));
|
||||
}
|
||||
|
||||
return result.IDs.str_id.data.map((s) => String(s));
|
||||
}
|
||||
|
||||
public async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: Omit<DeleteReq, "ids">,
|
||||
): Promise<void> {
|
||||
await this.ensureCollection();
|
||||
|
||||
await this.milvusClient.delete({
|
||||
ids: [refDocId],
|
||||
collection_name: this.collectionName,
|
||||
...deleteOptions,
|
||||
});
|
||||
}
|
||||
|
||||
public toMilvusFilter(filters?: MetadataFilters): string | undefined {
|
||||
if (!filters) return undefined;
|
||||
// TODO: Milvus also support standard filters, we can add it later
|
||||
return parseScalarFilters(filters);
|
||||
}
|
||||
|
||||
public async query(
|
||||
query: VectorStoreQuery,
|
||||
_options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
await this.ensureCollection();
|
||||
|
||||
const found = await this.milvusClient.search(<SearchSimpleReq>{
|
||||
collection_name: this.collectionName,
|
||||
limit: query.similarityTopK,
|
||||
vector: query.queryEmbedding,
|
||||
filter: this.toMilvusFilter(query.filters),
|
||||
});
|
||||
|
||||
const nodes: BaseNode<Metadata>[] = [];
|
||||
const similarities: number[] = [];
|
||||
const ids: string[] = [];
|
||||
|
||||
found.results.forEach((result) => {
|
||||
const node = metadataDictToNode(result.metadata);
|
||||
node.setContent(result.content);
|
||||
nodes.push(node);
|
||||
|
||||
similarities.push(result.score);
|
||||
ids.push(String(result.id));
|
||||
});
|
||||
|
||||
return {
|
||||
nodes,
|
||||
similarities,
|
||||
ids,
|
||||
};
|
||||
}
|
||||
|
||||
public async persist() {
|
||||
// no need to do anything
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/milvus";
|
||||
|
||||
@@ -1,340 +1 @@
|
||||
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { MetadataMode } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { BulkWriteOptions, Collection } from "mongodb";
|
||||
import { MongoClient } from "mongodb";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
type FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
// define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/
|
||||
const DEFAULT_EMBEDDING_DEFINITION = {
|
||||
type: "knnVector",
|
||||
dimensions: 1536,
|
||||
similarity: "cosine",
|
||||
};
|
||||
|
||||
function mapLcMqlFilterOperators(operator: string): string {
|
||||
const operatorMap: { [key in FilterOperator]?: string } = {
|
||||
"==": "$eq",
|
||||
"<": "$lt",
|
||||
"<=": "$lte",
|
||||
">": "$gt",
|
||||
">=": "$gte",
|
||||
"!=": "$ne",
|
||||
in: "$in",
|
||||
nin: "$nin",
|
||||
};
|
||||
const mqlOperator = operatorMap[operator as FilterOperator];
|
||||
if (!mqlOperator) throw new Error(`Unsupported operator: ${operator}`);
|
||||
return mqlOperator;
|
||||
}
|
||||
|
||||
function toMongoDBFilter(filters?: MetadataFilters): Record<string, unknown> {
|
||||
if (!filters) return {};
|
||||
|
||||
const createFilterObject = (mf: MetadataFilter) => ({
|
||||
[mf.key]: {
|
||||
[mapLcMqlFilterOperators(mf.operator)]: mf.value,
|
||||
},
|
||||
});
|
||||
|
||||
if (filters.filters.length === 1) {
|
||||
return createFilterObject(filters.filters[0]!);
|
||||
}
|
||||
|
||||
if (filters.condition === FilterCondition.AND) {
|
||||
return { $and: filters.filters.map(createFilterObject) };
|
||||
}
|
||||
|
||||
if (filters.condition === FilterCondition.OR) {
|
||||
return { $or: filters.filters.map(createFilterObject) };
|
||||
}
|
||||
|
||||
throw new Error("filters condition not recognized. Must be AND or OR");
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector store that uses MongoDB Atlas for storage and vector search.
|
||||
* This store uses the $vectorSearch aggregation stage to perform vector similarity search.
|
||||
*/
|
||||
export class MongoDBAtlasVectorSearch extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
flatMetadata: boolean = true;
|
||||
|
||||
dbName: string;
|
||||
collectionName: string;
|
||||
autoCreateIndex: boolean;
|
||||
embeddingDefinition: Record<string, unknown>;
|
||||
indexedMetadataFields: string[];
|
||||
|
||||
/**
|
||||
* The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
|
||||
*/
|
||||
mongodbClient: MongoClient;
|
||||
|
||||
/**
|
||||
* Name of the vector index. If invalid, Mongo will silently ignore this issue and return 0 results.
|
||||
*
|
||||
* Default: "default"
|
||||
*/
|
||||
indexName: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the embedding vector.
|
||||
*
|
||||
* Default: "embedding"
|
||||
*/
|
||||
embeddingKey: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the node id.
|
||||
*
|
||||
* Default: "id"
|
||||
*/
|
||||
idKey: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the node text.
|
||||
*
|
||||
* Default: "text"
|
||||
*/
|
||||
textKey: string;
|
||||
|
||||
/**
|
||||
* Name of the key containing the node metadata.
|
||||
*
|
||||
* Default: "metadata"
|
||||
*/
|
||||
metadataKey: string;
|
||||
|
||||
/**
|
||||
* Options to pass to the insertMany function when adding nodes.
|
||||
*/
|
||||
insertOptions?: BulkWriteOptions | undefined;
|
||||
|
||||
/**
|
||||
* Function to determine the number of candidates to retrieve for a given query.
|
||||
* In case your results are not good, you might tune this value.
|
||||
*
|
||||
* {@link https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ | Run Vector Search Queries}
|
||||
*
|
||||
* {@link https://arxiv.org/abs/1603.09320 | Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs}
|
||||
*
|
||||
*
|
||||
* Default: query.similarityTopK * 10
|
||||
*/
|
||||
numCandidates: (query: VectorStoreQuery) => number;
|
||||
private collection?: Collection;
|
||||
|
||||
constructor(
|
||||
init: Partial<MongoDBAtlasVectorSearch> & {
|
||||
dbName: string;
|
||||
collectionName: string;
|
||||
embedModel?: BaseEmbedding;
|
||||
autoCreateIndex?: boolean;
|
||||
indexedMetadataFields?: string[];
|
||||
embeddingDefinition?: Record<string, unknown>;
|
||||
} & VectorStoreBaseParams,
|
||||
) {
|
||||
super(init);
|
||||
if (init.mongodbClient) {
|
||||
this.mongodbClient = init.mongodbClient;
|
||||
} else {
|
||||
const mongoUri = getEnv("MONGODB_URI");
|
||||
if (!mongoUri) {
|
||||
throw new Error(
|
||||
"Must specify MONGODB_URI via env variable if not directly passing in client.",
|
||||
);
|
||||
}
|
||||
this.mongodbClient = new MongoClient(mongoUri);
|
||||
}
|
||||
|
||||
this.dbName = init.dbName ?? "default_db";
|
||||
this.collectionName = init.collectionName ?? "default_collection";
|
||||
this.autoCreateIndex = init.autoCreateIndex ?? true;
|
||||
this.indexedMetadataFields = init.indexedMetadataFields ?? [];
|
||||
this.embeddingDefinition = {
|
||||
...DEFAULT_EMBEDDING_DEFINITION,
|
||||
...(init.embeddingDefinition ?? {}),
|
||||
};
|
||||
this.indexName = init.indexName ?? "default";
|
||||
this.embeddingKey = init.embeddingKey ?? "embedding";
|
||||
this.idKey = init.idKey ?? "id";
|
||||
this.textKey = init.textKey ?? "text";
|
||||
this.metadataKey = init.metadataKey ?? "metadata";
|
||||
this.numCandidates =
|
||||
init.numCandidates ?? ((query) => query.similarityTopK * 10);
|
||||
this.insertOptions = init.insertOptions;
|
||||
}
|
||||
|
||||
async ensureCollection(): Promise<Collection> {
|
||||
if (!this.collection) {
|
||||
const collection = await this.mongodbClient
|
||||
.db(this.dbName)
|
||||
.createCollection(this.collectionName);
|
||||
|
||||
this.collection = collection;
|
||||
}
|
||||
|
||||
if (this.autoCreateIndex) {
|
||||
const searchIndexes = await this.collection.listSearchIndexes().toArray();
|
||||
const indexExists = searchIndexes.some(
|
||||
(index) => index.name === this.indexName,
|
||||
);
|
||||
if (!indexExists) {
|
||||
const additionalDefinition: Record<string, { type: string }> = {};
|
||||
this.indexedMetadataFields.forEach((field) => {
|
||||
additionalDefinition[field] = { type: "token" };
|
||||
});
|
||||
await this.collection.createSearchIndex({
|
||||
name: this.indexName,
|
||||
definition: {
|
||||
mappings: {
|
||||
dynamic: true,
|
||||
fields: {
|
||||
embedding: this.embeddingDefinition,
|
||||
...additionalDefinition,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return this.collection;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add nodes to the vector store.
|
||||
*
|
||||
* @param nodes Nodes to add to the vector store
|
||||
* @returns List of node ids that were added
|
||||
*/
|
||||
async add(nodes: BaseNode[]): Promise<string[]> {
|
||||
if (!nodes || nodes.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const dataToInsert = nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.textKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
|
||||
// Include the specified metadata fields in the top level of the document (to help filter)
|
||||
const populatedMetadata: Record<string, unknown> = {};
|
||||
for (const field of this.indexedMetadataFields) {
|
||||
populatedMetadata[field] = metadata[field];
|
||||
}
|
||||
|
||||
return {
|
||||
[this.idKey]: node.id_,
|
||||
[this.embeddingKey]: node.getEmbedding(),
|
||||
[this.textKey]: node.getContent(MetadataMode.NONE) || "",
|
||||
[this.metadataKey]: metadata,
|
||||
...populatedMetadata,
|
||||
};
|
||||
});
|
||||
|
||||
const collection = await this.ensureCollection();
|
||||
const insertResult = await collection.insertMany(
|
||||
dataToInsert,
|
||||
this.insertOptions,
|
||||
);
|
||||
return nodes.map((node) => node.id_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete nodes from the vector store with the given redDocId.
|
||||
*
|
||||
* @param refDocId The refDocId of the nodes to delete
|
||||
* @param deleteOptions Options to pass to the deleteOne function
|
||||
*/
|
||||
async delete(refDocId: string, deleteOptions?: object): Promise<void> {
|
||||
const collection = await this.ensureCollection();
|
||||
await collection.deleteMany(
|
||||
{
|
||||
[`${this.metadataKey}.ref_doc_id`]: refDocId,
|
||||
},
|
||||
deleteOptions,
|
||||
);
|
||||
}
|
||||
|
||||
client() {
|
||||
return this.mongodbClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a vector similarity search query.
|
||||
*
|
||||
* @param query The query to run
|
||||
* @returns List of nodes and their similarities
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const params: Record<string, unknown> = {
|
||||
queryVector: query.queryEmbedding,
|
||||
path: this.embeddingKey,
|
||||
numCandidates: this.numCandidates(query),
|
||||
limit: query.similarityTopK,
|
||||
index: this.indexName,
|
||||
};
|
||||
|
||||
if (query.filters) {
|
||||
params.filter = toMongoDBFilter(query.filters);
|
||||
}
|
||||
|
||||
const queryField = { $vectorSearch: params };
|
||||
const pipeline = [
|
||||
queryField,
|
||||
{
|
||||
$project: {
|
||||
score: { $meta: "vectorSearchScore" },
|
||||
[this.embeddingKey]: 0,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const collection = await this.ensureCollection();
|
||||
const cursor = await collection.aggregate(pipeline);
|
||||
|
||||
const nodes: BaseNode[] = [];
|
||||
const ids: string[] = [];
|
||||
const similarities: number[] = [];
|
||||
|
||||
for await (const res of await cursor) {
|
||||
const text = res[this.textKey];
|
||||
const score = res.score;
|
||||
const id = res[this.idKey];
|
||||
const metadata = res[this.metadataKey];
|
||||
|
||||
const node = metadataDictToNode(metadata);
|
||||
node.setContent(text);
|
||||
|
||||
ids.push(id);
|
||||
nodes.push(node);
|
||||
similarities.push(score);
|
||||
}
|
||||
|
||||
const result = {
|
||||
nodes,
|
||||
similarities,
|
||||
ids,
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/mongodb";
|
||||
|
||||
@@ -1,595 +1,7 @@
|
||||
import type pg from "pg";
|
||||
|
||||
import type { IsomorphicDB } from "@llamaindex/core/vector-store";
|
||||
import type { VercelPool } from "@vercel/postgres";
|
||||
import type { Sql } from "postgres";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilterValue,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
|
||||
import { escapeLikeString } from "./utils.js";
|
||||
|
||||
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import { DEFAULT_COLLECTION } from "@llamaindex/core/global";
|
||||
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
|
||||
import { Document, MetadataMode } from "@llamaindex/core/schema";
|
||||
|
||||
// todo: create adapter for postgres client
|
||||
function fromVercelPool(client: VercelPool): IsomorphicDB {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const queryFn = async (sql: string, params?: any[]): Promise<any[]> => {
|
||||
return client.query(sql, params).then((result) => result.rows);
|
||||
};
|
||||
return {
|
||||
query: queryFn,
|
||||
begin: async (fn) => {
|
||||
await client.query("BEGIN");
|
||||
try {
|
||||
const result = await fn(queryFn);
|
||||
await client.query("COMMIT");
|
||||
return result;
|
||||
} catch (e) {
|
||||
await client.query("ROLLBACK");
|
||||
throw e;
|
||||
}
|
||||
},
|
||||
connect: async () => {
|
||||
await client.connect();
|
||||
},
|
||||
close: async () => client.end(),
|
||||
onCloseEvent: (fn) => {
|
||||
client.on("remove", () => {
|
||||
fn();
|
||||
});
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fromPostgres(client: Sql): IsomorphicDB {
|
||||
return {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
query: async (sql: string, params?: any[]): Promise<any[]> => {
|
||||
return client.unsafe(sql, params);
|
||||
},
|
||||
begin: async (fn) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let res: any;
|
||||
await client.begin(async (scopedClient) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const queryFn = async (sql: string, params?: any[]): Promise<any[]> => {
|
||||
return scopedClient.unsafe(sql, params);
|
||||
};
|
||||
res = await fn(queryFn);
|
||||
});
|
||||
return res;
|
||||
},
|
||||
connect: () => Promise.resolve(),
|
||||
close: async () => client.end(),
|
||||
onCloseEvent: () => {
|
||||
// no close event
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fromPG(client: pg.Client | pg.PoolClient): IsomorphicDB {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const queryFn = async (sql: string, params?: any[]): Promise<any[]> => {
|
||||
return (await client.query(sql, params)).rows;
|
||||
};
|
||||
return {
|
||||
query: queryFn,
|
||||
begin: async (fn) => {
|
||||
await client.query("BEGIN");
|
||||
try {
|
||||
const result = await fn(queryFn);
|
||||
await client.query("COMMIT");
|
||||
return result;
|
||||
} catch (e) {
|
||||
await client.query("ROLLBACK");
|
||||
throw e;
|
||||
}
|
||||
},
|
||||
connect: () => client.connect(),
|
||||
close: async () => {
|
||||
if ("end" in client) {
|
||||
await client.end();
|
||||
} else if ("release" in client) {
|
||||
client.release();
|
||||
}
|
||||
},
|
||||
onCloseEvent: (fn) => {
|
||||
client.on("end", fn);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export const PGVECTOR_SCHEMA = "public";
|
||||
export const PGVECTOR_TABLE = "llamaindex_embedding";
|
||||
export const DEFAULT_DIMENSIONS = 1536;
|
||||
|
||||
type PGVectorStoreBaseConfig = {
|
||||
schemaName?: string | undefined;
|
||||
tableName?: string | undefined;
|
||||
dimensions?: number | undefined;
|
||||
embedModel?: BaseEmbedding | undefined;
|
||||
performSetup?: boolean | undefined;
|
||||
};
|
||||
|
||||
export type PGVectorStoreConfig = VectorStoreBaseParams &
|
||||
PGVectorStoreBaseConfig &
|
||||
(
|
||||
| {
|
||||
/**
|
||||
* Client configuration options for the pg client.
|
||||
*
|
||||
* {@link https://node-postgres.com/apis/client#new-client PostgresSQL Client API}
|
||||
*/
|
||||
clientConfig: pg.ClientConfig;
|
||||
}
|
||||
| {
|
||||
/**
|
||||
* A pg client or pool client instance.
|
||||
* If provided, make sure it is not connected to the database yet, or it will throw an error.
|
||||
*/
|
||||
shouldConnect?: boolean | undefined;
|
||||
client: pg.Client | pg.PoolClient;
|
||||
}
|
||||
| {
|
||||
/**
|
||||
* No need to connect to the database, the client is already connected.
|
||||
*/
|
||||
shouldConnect?: false;
|
||||
client: Sql | VercelPool;
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* Provides support for writing and querying vector data in Postgres.
|
||||
* Note: Can't be used with data created using the Python version of the vector store (https://docs.llamaindex.ai/en/stable/examples/vector_stores/postgres/)
|
||||
*/
|
||||
export class PGVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
private collection: string = DEFAULT_COLLECTION;
|
||||
private readonly schemaName: string = PGVECTOR_SCHEMA;
|
||||
private readonly tableName: string = PGVECTOR_TABLE;
|
||||
private readonly dimensions: number = DEFAULT_DIMENSIONS;
|
||||
|
||||
private isDBConnected: boolean = false;
|
||||
private db: IsomorphicDB | null = null;
|
||||
private readonly clientConfig: pg.ClientConfig | null = null;
|
||||
private readonly performSetup: boolean = true;
|
||||
|
||||
constructor(config: PGVectorStoreConfig) {
|
||||
super(config);
|
||||
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
|
||||
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
|
||||
this.dimensions = config?.dimensions ?? DEFAULT_DIMENSIONS;
|
||||
this.performSetup = config?.performSetup ?? true;
|
||||
if ("clientConfig" in config) {
|
||||
this.clientConfig = config.clientConfig;
|
||||
} else {
|
||||
if (
|
||||
config.client.constructor.name.includes("Vercel") ||
|
||||
(!!(config.client as VercelPool).connect &&
|
||||
!!(config.client as VercelPool).query &&
|
||||
!(config.client as Sql).unsafe)
|
||||
) {
|
||||
this.isDBConnected = true;
|
||||
this.db = fromVercelPool(config.client as unknown as VercelPool);
|
||||
} else if (typeof config.client === "function") {
|
||||
this.isDBConnected = true;
|
||||
this.db = fromPostgres(config.client as Sql);
|
||||
} else {
|
||||
this.isDBConnected =
|
||||
config.shouldConnect !== undefined ? !config.shouldConnect : false;
|
||||
this.db = fromPG(config.client as pg.Client | pg.PoolClient);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter for the collection property.
|
||||
* Using a collection allows for simple segregation of vector data,
|
||||
* e.g. by user, source, or access-level.
|
||||
* Leave/set blank to ignore the collection value when querying.
|
||||
* @param coll Name for the collection.
|
||||
*/
|
||||
setCollection(coll: string) {
|
||||
this.collection = coll;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getter for the collection property.
|
||||
* Using a collection allows for simple segregation of vector data,
|
||||
* e.g. by user, source, or access-level.
|
||||
* Leave/set blank to ignore the collection value when querying.
|
||||
* @returns The currently-set collection value. Default is empty string.
|
||||
*/
|
||||
getCollection(): string {
|
||||
return this.collection;
|
||||
}
|
||||
|
||||
private async getDb(): Promise<IsomorphicDB> {
|
||||
if (!this.db) {
|
||||
const pg = await import("pg");
|
||||
const { Client } = pg.default ? pg.default : pg;
|
||||
|
||||
const { registerTypes } = await import("pgvector/pg");
|
||||
// Create DB connection
|
||||
// Read connection params from env - see comment block above
|
||||
const db = new Client({
|
||||
...this.clientConfig,
|
||||
});
|
||||
|
||||
await db.connect();
|
||||
this.isDBConnected = true;
|
||||
|
||||
// Check vector extension
|
||||
await db.query("CREATE EXTENSION IF NOT EXISTS vector");
|
||||
await registerTypes(db);
|
||||
|
||||
// All good? Keep the connection reference
|
||||
this.db = fromPG(db);
|
||||
}
|
||||
|
||||
if (this.db && !this.isDBConnected) {
|
||||
await this.db.connect();
|
||||
this.isDBConnected = true;
|
||||
}
|
||||
|
||||
this.db.onCloseEvent(() => {
|
||||
this.isDBConnected = false;
|
||||
});
|
||||
|
||||
if (this.performSetup) {
|
||||
// Check schema, table(s), index(es)
|
||||
await this.checkSchema(this.db);
|
||||
}
|
||||
|
||||
return this.db;
|
||||
}
|
||||
|
||||
private async checkSchema(db: IsomorphicDB) {
|
||||
await db.query(`CREATE SCHEMA IF NOT EXISTS ${this.schemaName}`);
|
||||
|
||||
await db.query(`CREATE TABLE IF NOT EXISTS ${this.schemaName}.${this.tableName}(
|
||||
id uuid DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
external_id VARCHAR,
|
||||
collection VARCHAR,
|
||||
document TEXT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
embeddings VECTOR(${this.dimensions})
|
||||
)`);
|
||||
await db.query(
|
||||
`CREATE INDEX IF NOT EXISTS idx_${this.tableName}_external_id ON ${this.schemaName}.${this.tableName} (external_id);`,
|
||||
);
|
||||
await db.query(
|
||||
`CREATE INDEX IF NOT EXISTS idx_${this.tableName}_collection ON ${this.schemaName}.${this.tableName} (collection);`,
|
||||
);
|
||||
|
||||
// TODO add IVFFlat or HNSW indexing?
|
||||
return db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects to the database specified in environment vars.
|
||||
* This method also checks and creates the vector extension,
|
||||
* the destination table and indexes if not found.
|
||||
* @returns A connection to the database, or the error encountered while connecting/setting up.
|
||||
*/
|
||||
client() {
|
||||
return this.getDb();
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all vector records for the specified collection.
|
||||
* NOTE: Uses the collection property controlled by setCollection/getCollection.
|
||||
* @returns The result of the delete query.
|
||||
*/
|
||||
async clearCollection() {
|
||||
const sql: string = `DELETE FROM ${this.schemaName}.${this.tableName}
|
||||
WHERE collection = $1`;
|
||||
|
||||
const db = await this.getDb();
|
||||
const ret = await db.query(sql, [this.collection]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private getDataToInsert(embeddingResults: BaseNode<Metadata>[]) {
|
||||
return embeddingResults.map((node) => {
|
||||
const id = node.id_.length ? node.id_ : null;
|
||||
const meta = node.metadata || {};
|
||||
if (!meta.create_date) {
|
||||
meta.create_date = new Date();
|
||||
}
|
||||
|
||||
return [
|
||||
// fixme: why id is null?
|
||||
id!,
|
||||
"",
|
||||
this.collection,
|
||||
node.getContent(MetadataMode.NONE),
|
||||
meta,
|
||||
"[" + node.getEmbedding().join(",") + "]",
|
||||
];
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds vector record(s) to the table.
|
||||
* NOTE: Uses the collection property controlled by setCollection/getCollection.
|
||||
* @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
|
||||
* @returns A list of zero or more id values for the created records.
|
||||
*/
|
||||
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
if (embeddingResults.length === 0) {
|
||||
console.warn("Empty list sent to PGVectorStore::add");
|
||||
return [];
|
||||
}
|
||||
|
||||
const db = await this.getDb();
|
||||
|
||||
return db.begin(async (query) => {
|
||||
const data = this.getDataToInsert(embeddingResults);
|
||||
|
||||
const placeholders = data
|
||||
.map(
|
||||
(_, index) =>
|
||||
`($${index * 6 + 1}, ` +
|
||||
`$${index * 6 + 2}, ` +
|
||||
`$${index * 6 + 3}, ` +
|
||||
`$${index * 6 + 4}, ` +
|
||||
`$${index * 6 + 5}, ` +
|
||||
`$${index * 6 + 6})`,
|
||||
)
|
||||
.join(", ");
|
||||
|
||||
const sql = `
|
||||
INSERT INTO ${this.schemaName}.${this.tableName}
|
||||
(id, external_id, collection, document, metadata, embeddings)
|
||||
VALUES ${placeholders}
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
external_id = EXCLUDED.external_id,
|
||||
collection = EXCLUDED.collection,
|
||||
document = EXCLUDED.document,
|
||||
metadata = EXCLUDED.metadata,
|
||||
embeddings = EXCLUDED.embeddings
|
||||
RETURNING id
|
||||
`;
|
||||
|
||||
const flattenedParams = data.flat();
|
||||
const result = await query(sql, flattenedParams);
|
||||
return result.map((row) => row.id as string);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a single record from the database by id.
|
||||
* NOTE: Uses the collection property controlled by setCollection/getCollection.
|
||||
* @param refDocId Unique identifier for the record to delete.
|
||||
* @param deleteKwargs Required by VectorStore interface. Currently ignored.
|
||||
* @returns Promise that resolves if the delete query did not throw an error.
|
||||
*/
|
||||
async delete(refDocId: string, deleteKwargs?: object): Promise<void> {
|
||||
const collectionCriteria = this.collection.length
|
||||
? "AND collection = $2"
|
||||
: "";
|
||||
const sql: string = `DELETE FROM ${this.schemaName}.${this.tableName}
|
||||
WHERE id = $1 ${collectionCriteria}`;
|
||||
|
||||
const db = await this.getDb();
|
||||
const params = this.collection.length
|
||||
? [refDocId, this.collection]
|
||||
: [refDocId];
|
||||
await db.query(sql, params);
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
private toPostgresCondition(condition: `${FilterCondition}`) {
|
||||
if (condition === FilterCondition.AND) {
|
||||
return "AND";
|
||||
}
|
||||
if (condition === FilterCondition.OR) {
|
||||
return "OR";
|
||||
}
|
||||
// fallback to AND
|
||||
else {
|
||||
return "AND";
|
||||
}
|
||||
}
|
||||
|
||||
private toPostgresOperator(operator: `${FilterOperator}`) {
|
||||
if (operator === FilterOperator.EQ) {
|
||||
return "=";
|
||||
}
|
||||
if (operator === FilterOperator.GT) {
|
||||
return ">";
|
||||
}
|
||||
if (operator === FilterOperator.LT) {
|
||||
return "<";
|
||||
}
|
||||
if (operator === FilterOperator.NE) {
|
||||
return "!=";
|
||||
}
|
||||
if (operator === FilterOperator.GTE) {
|
||||
return ">=";
|
||||
}
|
||||
if (operator === FilterOperator.LTE) {
|
||||
return "<=";
|
||||
}
|
||||
if (operator === FilterOperator.IN) {
|
||||
return "= ANY";
|
||||
}
|
||||
if (operator === FilterOperator.NIN) {
|
||||
return "!= ANY";
|
||||
}
|
||||
if (operator === FilterOperator.CONTAINS) {
|
||||
return "@>";
|
||||
}
|
||||
if (operator === FilterOperator.ANY) {
|
||||
return "?|";
|
||||
}
|
||||
if (operator === FilterOperator.ALL) {
|
||||
return "?&";
|
||||
}
|
||||
// fallback to "="
|
||||
return "=";
|
||||
}
|
||||
|
||||
private buildFilterClause(
|
||||
filter: MetadataFilter,
|
||||
paramIndex: number,
|
||||
): {
|
||||
clause: string;
|
||||
param: string | string[] | number | number[] | undefined;
|
||||
} {
|
||||
if (
|
||||
filter.operator === FilterOperator.IN ||
|
||||
filter.operator === FilterOperator.NIN
|
||||
) {
|
||||
return {
|
||||
clause: `metadata->>'${filter.key}' ${this.toPostgresOperator(filter.operator)}($${paramIndex})`,
|
||||
param: filter.value,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
filter.operator === FilterOperator.ALL ||
|
||||
filter.operator === FilterOperator.ANY
|
||||
) {
|
||||
return {
|
||||
clause: `metadata->'${filter.key}' ${this.toPostgresOperator(filter.operator)} $${paramIndex}::text[]`,
|
||||
param: filter.value,
|
||||
};
|
||||
}
|
||||
|
||||
if (filter.operator === FilterOperator.CONTAINS) {
|
||||
return {
|
||||
clause: `metadata->'${filter.key}' ${this.toPostgresOperator(filter.operator)} $${paramIndex}::jsonb`,
|
||||
param: JSON.stringify([filter.value]),
|
||||
};
|
||||
}
|
||||
|
||||
if (filter.operator === FilterOperator.IS_EMPTY) {
|
||||
return {
|
||||
clause: `(NOT (metadata ? '${filter.key}') OR metadata->>'${filter.key}' IS NULL OR metadata->>'${filter.key}' = '' OR metadata->'${filter.key}' = '[]'::jsonb)`,
|
||||
param: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
if (filter.operator === FilterOperator.TEXT_MATCH) {
|
||||
const escapedValue = escapeLikeString(filter.value as string);
|
||||
return {
|
||||
clause: `metadata->>'${filter.key}' LIKE $${paramIndex}`,
|
||||
param: `%${escapedValue}%`,
|
||||
};
|
||||
}
|
||||
|
||||
// if value is number, coerce metadata value to float
|
||||
if (typeof filter.value === "number") {
|
||||
return {
|
||||
clause: `(metadata->>'${filter.key}')::float ${this.toPostgresOperator(filter.operator)} $${paramIndex}`,
|
||||
param: filter.value,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
clause: `metadata->>'${filter.key}' ${this.toPostgresOperator(filter.operator)} $${paramIndex}`,
|
||||
param: filter.value,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the vector store for the closest matching data to the query embeddings
|
||||
* @param query The VectorStoreQuery to be used
|
||||
* @param options Required by VectorStore interface. Currently ignored.
|
||||
* @returns Zero or more Document instances with data from the vector store.
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
// TODO QUERY TYPES:
|
||||
// Distance: SELECT embedding <=> $1 AS distance FROM items;
|
||||
// Inner Product: SELECT (embedding <#> $1) * -1 AS inner_product FROM items;
|
||||
// Cosine Sim: SELECT 1 - (embedding <=> $1) AS cosine_similarity FROM items;
|
||||
|
||||
const embedding = "[" + query.queryEmbedding?.join(",") + "]";
|
||||
const max = query.similarityTopK ?? 2;
|
||||
const whereClauses = this.collection.length ? ["collection = $2"] : [];
|
||||
|
||||
const params: Array<MetadataFilterValue> = this.collection.length
|
||||
? [embedding, this.collection]
|
||||
: [embedding];
|
||||
|
||||
const filterClauses: string[] = [];
|
||||
query.filters?.filters.forEach((filter, index) => {
|
||||
const paramIndex = params.length + 1;
|
||||
const { clause, param } = this.buildFilterClause(filter, paramIndex);
|
||||
filterClauses.push(clause);
|
||||
if (param) {
|
||||
params.push(param);
|
||||
}
|
||||
});
|
||||
|
||||
if (filterClauses.length > 0) {
|
||||
const condition = this.toPostgresCondition(
|
||||
query.filters?.condition ?? FilterCondition.AND,
|
||||
);
|
||||
whereClauses.push(`(${filterClauses.join(` ${condition} `)})`);
|
||||
}
|
||||
|
||||
const where =
|
||||
whereClauses.length > 0 ? `WHERE ${whereClauses.join(" AND ")}` : "";
|
||||
|
||||
const sql = `SELECT
|
||||
v.*,
|
||||
embeddings <=> $1 s
|
||||
FROM ${this.schemaName}.${this.tableName} v
|
||||
${where}
|
||||
ORDER BY s
|
||||
LIMIT ${max}
|
||||
`;
|
||||
|
||||
const db = await this.getDb();
|
||||
const results = await db.query(sql, params);
|
||||
|
||||
const nodes = results.map((row) => {
|
||||
return new Document({
|
||||
id_: row.id,
|
||||
text: row.document,
|
||||
metadata: row.metadata,
|
||||
embedding:
|
||||
typeof row.embeddings === "string"
|
||||
? JSON.parse(row.embeddings)
|
||||
: row.embeddings,
|
||||
});
|
||||
});
|
||||
|
||||
const ret = {
|
||||
nodes: nodes,
|
||||
similarities: results.map((row) => 1 - row.s),
|
||||
ids: results.map((row) => row.id),
|
||||
};
|
||||
|
||||
return Promise.resolve(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* Required by VectorStore interface. Currently ignored.
|
||||
* @param persistPath
|
||||
* @returns Resolved Promise.
|
||||
*/
|
||||
persist(persistPath: string): Promise<void> {
|
||||
return Promise.resolve();
|
||||
}
|
||||
}
|
||||
export {
|
||||
DEFAULT_DIMENSIONS,
|
||||
PGVECTOR_SCHEMA,
|
||||
PGVECTOR_TABLE,
|
||||
PGVectorStore,
|
||||
type PGVectorStoreConfig,
|
||||
} from "@llamaindex/postgres";
|
||||
|
||||
@@ -1,293 +1 @@
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
|
||||
import type { BaseNode, Metadata } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type {
|
||||
FetchResponse,
|
||||
Index,
|
||||
PineconeRecord,
|
||||
QueryOptions,
|
||||
ScoredPineconeRecord,
|
||||
} from "@pinecone-database/pinecone";
|
||||
import { type Pinecone } from "@pinecone-database/pinecone";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
type PineconeParams = {
|
||||
indexName?: string;
|
||||
chunkSize?: number;
|
||||
namespace?: string;
|
||||
textKey?: string;
|
||||
apiKey?: string;
|
||||
} & VectorStoreBaseParams;
|
||||
|
||||
/**
|
||||
* Provides support for writing and querying vector data in Pinecone.
|
||||
*/
|
||||
export class PineconeVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
/*
|
||||
FROM @pinecone-database/pinecone:
|
||||
PINECONE_API_KEY="your_api_key"
|
||||
PINECONE_ENVIRONMENT="your_environment"
|
||||
Our addition:
|
||||
PINECONE_INDEX_NAME="llama"
|
||||
PINECONE_CHUNK_SIZE=100
|
||||
*/
|
||||
db?: Pinecone;
|
||||
indexName: string;
|
||||
namespace: string;
|
||||
chunkSize: number;
|
||||
textKey: string;
|
||||
|
||||
apiKey: string;
|
||||
|
||||
constructor(params?: PineconeParams) {
|
||||
super(params);
|
||||
this.indexName =
|
||||
params?.indexName ?? getEnv("PINECONE_INDEX_NAME") ?? "llama";
|
||||
this.namespace = params?.namespace ?? getEnv("PINECONE_NAMESPACE") ?? "";
|
||||
this.chunkSize =
|
||||
params?.chunkSize ??
|
||||
Number.parseInt(getEnv("PINECONE_CHUNK_SIZE") ?? "100");
|
||||
this.textKey = params?.textKey ?? "text";
|
||||
const apiKey = params?.apiKey ?? getEnv("PINECONE_API_KEY");
|
||||
if (!apiKey) {
|
||||
throw new Error("PINECONE_API_KEY is required");
|
||||
}
|
||||
this.apiKey = apiKey;
|
||||
}
|
||||
|
||||
private async getDb(): Promise<Pinecone> {
|
||||
if (!this.db) {
|
||||
const { Pinecone } = await import("@pinecone-database/pinecone");
|
||||
this.db = new Pinecone({
|
||||
apiKey: this.apiKey,
|
||||
});
|
||||
}
|
||||
|
||||
return Promise.resolve(this.db);
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects to the Pinecone account specified in environment vars.
|
||||
* This method also checks and creates the named index if not found.
|
||||
* @returns Pinecone client, or the error encountered while connecting/setting up.
|
||||
*/
|
||||
client() {
|
||||
return this.getDb();
|
||||
}
|
||||
|
||||
async index() {
|
||||
const db: Pinecone = await this.getDb();
|
||||
return db.index(this.indexName).namespace(this.namespace);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all records for the current index.
|
||||
* NOTE: This operation is not supported by Pinecone for "Starter" (free) indexes.
|
||||
* @returns The result of the delete query.
|
||||
*/
|
||||
async clearIndex() {
|
||||
const idx = await this.index();
|
||||
return await idx.deleteAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds vector record(s) to the table.
|
||||
* @TODO Does not create or insert sparse vectors.
|
||||
* @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
|
||||
* @returns Due to limitations in the Pinecone client, does not return the upserted ID list, only a Promise resolve/reject.
|
||||
*/
|
||||
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
if (embeddingResults.length == 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const idx: Index = await this.index();
|
||||
const nodes = embeddingResults.map(this.nodeToRecord);
|
||||
|
||||
for (let i = 0; i < nodes.length; i += this.chunkSize) {
|
||||
const chunk = nodes.slice(i, i + this.chunkSize);
|
||||
const result = await this.saveChunk(idx, chunk);
|
||||
if (!result) {
|
||||
throw new Error("Failed to save chunk");
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
protected async saveChunk(idx: Index, chunk: PineconeRecord[]) {
|
||||
try {
|
||||
await idx.upsert(chunk);
|
||||
return true;
|
||||
} catch (err) {
|
||||
const msg = `${err}`;
|
||||
console.log(msg, err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a single record from the database by id.
|
||||
* NOTE: Uses the collection property controlled by setCollection/getCollection.
|
||||
* @param refDocId Unique identifier for the record to delete.
|
||||
* @param deleteKwargs Required by VectorStore interface. Currently ignored.
|
||||
* @returns Promise that resolves if the delete query did not throw an error.
|
||||
*/
|
||||
async delete(refDocId: string, deleteKwargs?: object): Promise<void> {
|
||||
const idx = await this.index();
|
||||
return idx.deleteOne(refDocId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the vector store for the closest matching data to the query embeddings
|
||||
* @TODO QUERY TYPES
|
||||
* @param query The VectorStoreQuery to be used
|
||||
* @param _options Required by VectorStore interface. Currently ignored.
|
||||
* @returns Zero or more Document instances with data from the vector store.
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
_options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const filter = this.toPineconeFilter(query.filters);
|
||||
|
||||
const defaultOptions: QueryOptions = {
|
||||
vector: query.queryEmbedding!,
|
||||
topK: query.similarityTopK,
|
||||
includeValues: true,
|
||||
includeMetadata: true,
|
||||
};
|
||||
|
||||
if (filter) {
|
||||
defaultOptions.filter = filter;
|
||||
}
|
||||
|
||||
const idx = await this.index();
|
||||
const results = await idx.query(defaultOptions);
|
||||
|
||||
const idList = results.matches.map((row) => row.id);
|
||||
if (idList.length == 0) {
|
||||
return { nodes: [], similarities: [], ids: [] };
|
||||
}
|
||||
const records: FetchResponse = await idx.fetch(idList);
|
||||
const rows = Object.values(records.records);
|
||||
|
||||
const nodes = rows.map((row) => {
|
||||
const node = metadataDictToNode(row.metadata ?? {}, {
|
||||
fallback: {
|
||||
id: row.id,
|
||||
text: this.textFromResultRow(row),
|
||||
metadata: this.metaWithoutText(row.metadata ?? {}),
|
||||
embedding: row.values,
|
||||
},
|
||||
});
|
||||
return node;
|
||||
});
|
||||
|
||||
return {
|
||||
nodes: nodes,
|
||||
similarities: results.matches.map((row) => row.score || 999),
|
||||
ids: results.matches.map((row) => row.id),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Required by VectorStore interface. Currently ignored.
|
||||
* @param persistPath
|
||||
* @returns Resolved Promise.
|
||||
*/
|
||||
persist(persistPath: string): Promise<void> {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
toPineconeFilter(stdFilters?: MetadataFilters): object | undefined {
|
||||
if (stdFilters == null) return undefined;
|
||||
|
||||
const transformCondition = (
|
||||
condition: `${FilterCondition}` = "and",
|
||||
): string => {
|
||||
if (condition === "and") return "$and";
|
||||
if (condition === "or") return "$or";
|
||||
throw new Error(`Filter condition ${condition} not supported`);
|
||||
};
|
||||
|
||||
const transformOperator = (operator: `${FilterOperator}`): string => {
|
||||
switch (operator) {
|
||||
case "!=":
|
||||
return "$ne";
|
||||
case "==":
|
||||
return "$eq";
|
||||
case ">":
|
||||
return "$gt";
|
||||
case "<":
|
||||
return "$lt";
|
||||
case ">=":
|
||||
return "$gte";
|
||||
case "<=":
|
||||
return "$lte";
|
||||
case "in":
|
||||
return "$in";
|
||||
case "nin":
|
||||
return "$nin";
|
||||
default:
|
||||
throw new Error(`Filter operator ${operator} not supported`);
|
||||
}
|
||||
};
|
||||
|
||||
const convertFilterItem = (filter: MetadataFilter) => {
|
||||
return {
|
||||
[filter.key]: {
|
||||
[transformOperator(filter.operator)]: filter.value,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
const convertFilter = (filter: MetadataFilters): object => {
|
||||
const filtersList = filter.filters
|
||||
.map((f) => convertFilterItem(f))
|
||||
.filter((f) => Object.keys(f).length > 0);
|
||||
|
||||
if (filtersList.length === 0) return {};
|
||||
if (filtersList.length === 1) return filtersList[0] ?? {};
|
||||
|
||||
const condition = transformCondition(filter.condition);
|
||||
return { [condition]: filtersList };
|
||||
};
|
||||
|
||||
return convertFilter(stdFilters);
|
||||
}
|
||||
|
||||
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
|
||||
return row.metadata?.[this.textKey] ?? "";
|
||||
}
|
||||
|
||||
metaWithoutText(meta: Metadata) {
|
||||
return Object.keys(meta)
|
||||
.filter((key) => key != this.textKey)
|
||||
.reduce<Record<string, unknown>>((acc, key: string) => {
|
||||
acc[key] = meta[key];
|
||||
return acc;
|
||||
}, {});
|
||||
}
|
||||
|
||||
nodeToRecord(node: BaseNode<Metadata>) {
|
||||
const id = node.id_.length ? node.id_ : null;
|
||||
return {
|
||||
// fixme: why id is null?
|
||||
id: id!,
|
||||
values: node.getEmbedding(),
|
||||
metadata: nodeToMetadata(node),
|
||||
};
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/pinecone";
|
||||
|
||||
@@ -1,414 +1 @@
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterCondition,
|
||||
FilterOperator,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
|
||||
import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest";
|
||||
import { QdrantClient } from "@qdrant/js-client-rest";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
type QdrantFilter = Schemas["Filter"];
|
||||
type QdrantMustConditions = QdrantFilter["must"];
|
||||
|
||||
type PointStruct = {
|
||||
id: string;
|
||||
payload: Record<string, string>;
|
||||
vector: number[];
|
||||
};
|
||||
|
||||
type QdrantParams = {
|
||||
collectionName?: string;
|
||||
client?: QdrantClient;
|
||||
url?: string;
|
||||
apiKey?: string;
|
||||
batchSize?: number;
|
||||
} & VectorStoreBaseParams;
|
||||
|
||||
type QuerySearchResult = {
|
||||
id: string;
|
||||
score: number;
|
||||
payload: Record<string, unknown>;
|
||||
vector: number[] | null;
|
||||
version: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Qdrant vector store.
|
||||
*/
|
||||
export class QdrantVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
batchSize: number;
|
||||
collectionName: string;
|
||||
|
||||
private db: QdrantClient;
|
||||
private collectionInitialized: boolean = false;
|
||||
|
||||
/**
|
||||
* Creates a new QdrantVectorStore.
|
||||
* @param collectionName Qdrant collection name
|
||||
* @param client Qdrant client
|
||||
* @param url Qdrant URL
|
||||
* @param apiKey Qdrant API key
|
||||
* @param batchSize Number of vectors to upload in a single batch
|
||||
* @param embedModel Embedding model
|
||||
*/
|
||||
constructor({
|
||||
collectionName,
|
||||
client,
|
||||
url,
|
||||
apiKey,
|
||||
batchSize,
|
||||
...init
|
||||
}: QdrantParams) {
|
||||
super(init);
|
||||
if (!client && !url) {
|
||||
if (!url) {
|
||||
throw new Error("QdrantVectorStore requires url and collectionName");
|
||||
}
|
||||
}
|
||||
|
||||
if (client) {
|
||||
this.db = client;
|
||||
} else {
|
||||
this.db = new QdrantClient(<QdrantClientParams>{
|
||||
url: url,
|
||||
apiKey: apiKey,
|
||||
});
|
||||
}
|
||||
|
||||
this.collectionName = collectionName ?? "default";
|
||||
this.batchSize = batchSize ?? 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Qdrant client.
|
||||
* @returns Qdrant client
|
||||
*/
|
||||
client() {
|
||||
return this.db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a collection in Qdrant.
|
||||
* @param collectionName Qdrant collection name
|
||||
* @param vectorSize Dimensionality of the vectors
|
||||
*/
|
||||
async createCollection(collectionName: string, vectorSize: number) {
|
||||
await this.db.createCollection(collectionName, {
|
||||
vectors: {
|
||||
size: vectorSize,
|
||||
distance: "Cosine",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the collection exists in Qdrant and creates it if not.
|
||||
* @param collectionName Qdrant collection name
|
||||
* @returns
|
||||
*/
|
||||
async collectionExists(collectionName: string): Promise<boolean> {
|
||||
try {
|
||||
await this.db.getCollection(collectionName);
|
||||
return true;
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the collection in Qdrant.
|
||||
* @param vectorSize Dimensionality of the vectors
|
||||
*/
|
||||
async initializeCollection(vectorSize: number) {
|
||||
const exists = await this.collectionExists(this.collectionName);
|
||||
if (!exists) {
|
||||
await this.createCollection(this.collectionName, vectorSize);
|
||||
}
|
||||
this.collectionInitialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a list of points from the given nodes.
|
||||
* @param nodes
|
||||
* @returns
|
||||
*/
|
||||
async buildPoints(nodes: BaseNode[]): Promise<{
|
||||
points: PointStruct[];
|
||||
ids: string[];
|
||||
}> {
|
||||
const points: PointStruct[] = [];
|
||||
const ids = [];
|
||||
|
||||
for (let i = 0; i < nodes.length; i++) {
|
||||
const nodeIds = [];
|
||||
const vectors = [];
|
||||
const payloads = [];
|
||||
|
||||
for (let j = 0; j < this.batchSize && i < nodes.length; j++, i++) {
|
||||
const node = nodes[i]!;
|
||||
|
||||
nodeIds.push(node);
|
||||
|
||||
vectors.push(node.getEmbedding());
|
||||
|
||||
const metadata = nodeToMetadata(node);
|
||||
|
||||
payloads.push(metadata);
|
||||
}
|
||||
|
||||
for (let k = 0; k < nodeIds.length; k++) {
|
||||
const point: PointStruct = {
|
||||
id: nodeIds[k]!.id_,
|
||||
payload: payloads[k]!,
|
||||
vector: vectors[k]!,
|
||||
};
|
||||
|
||||
points.push(point);
|
||||
}
|
||||
|
||||
ids.push(...nodeIds.map((node) => node.id_));
|
||||
}
|
||||
|
||||
return {
|
||||
points: points,
|
||||
ids: ids,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given nodes to the vector store.
|
||||
* @param embeddingResults List of nodes
|
||||
* @returns List of node IDs
|
||||
*/
|
||||
async add(embeddingResults: BaseNode[]): Promise<string[]> {
|
||||
if (embeddingResults.length > 0 && !this.collectionInitialized) {
|
||||
await this.initializeCollection(
|
||||
embeddingResults[0]!.getEmbedding().length,
|
||||
);
|
||||
}
|
||||
|
||||
const { points, ids } = await this.buildPoints(embeddingResults);
|
||||
|
||||
const batchUpsert = async (points: PointStruct[]) => {
|
||||
await this.db.upsert(this.collectionName, {
|
||||
points: points,
|
||||
});
|
||||
};
|
||||
|
||||
for (let i = 0; i < points.length; i += this.batchSize) {
|
||||
const chunk = points.slice(i, i + this.batchSize);
|
||||
await batchUpsert(chunk);
|
||||
}
|
||||
|
||||
return ids;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the given nodes from the vector store.
|
||||
* @param refDocId Node ID
|
||||
*/
|
||||
async delete(refDocId: string): Promise<void> {
|
||||
const mustFilter = [
|
||||
{
|
||||
key: "doc_id",
|
||||
match: {
|
||||
value: refDocId,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
await this.db.delete(this.collectionName, {
|
||||
filter: {
|
||||
must: mustFilter,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of a query to a VectorStoreQueryResult.
|
||||
* @param response Query response
|
||||
* @returns VectorStoreQueryResult
|
||||
*/
|
||||
private parseToQueryResult(
|
||||
response: Array<QuerySearchResult>,
|
||||
): VectorStoreQueryResult {
|
||||
const nodes = [];
|
||||
const similarities = [];
|
||||
const ids = [];
|
||||
|
||||
for (let i = 0; i < response.length; i++) {
|
||||
const item = response[i]!;
|
||||
const payload = item.payload;
|
||||
|
||||
const node = metadataDictToNode(payload);
|
||||
|
||||
ids.push(item.id);
|
||||
nodes.push(node);
|
||||
similarities.push(item.score);
|
||||
}
|
||||
|
||||
return {
|
||||
nodes: nodes,
|
||||
similarities: similarities,
|
||||
ids: ids,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the vector store for the closest matching data to the query embeddings.
|
||||
* @param query The VectorStoreQuery to be used
|
||||
* @param options Required by VectorStore interface. Currently ignored.
|
||||
* @returns Zero or more Document instances with data from the vector store.
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const qdrantFilters =
|
||||
options && "qdrant_filters" in options
|
||||
? options.qdrant_filters
|
||||
: undefined;
|
||||
|
||||
let queryFilters: QdrantFilter | undefined;
|
||||
|
||||
if (!query.queryEmbedding) {
|
||||
throw new Error("No query embedding provided");
|
||||
}
|
||||
|
||||
if (qdrantFilters) {
|
||||
queryFilters = qdrantFilters;
|
||||
} else {
|
||||
queryFilters = buildQueryFilter(query);
|
||||
}
|
||||
|
||||
const result = (await this.db.search(this.collectionName, {
|
||||
vector: query.queryEmbedding,
|
||||
limit: query.similarityTopK,
|
||||
...(queryFilters && { filter: queryFilters }),
|
||||
})) as Array<QuerySearchResult>;
|
||||
|
||||
return this.parseToQueryResult(result);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Qdrant filter builder
|
||||
* @param query The VectorStoreQuery to be used
|
||||
*/
|
||||
function buildQueryFilter(query: VectorStoreQuery): QdrantFilter | undefined {
|
||||
if (!query.docIds && !query.queryStr && !query.filters) return undefined;
|
||||
|
||||
const mustConditions: QdrantMustConditions = [];
|
||||
if (query.docIds) {
|
||||
mustConditions.push({
|
||||
key: "doc_id",
|
||||
match: { any: query.docIds },
|
||||
});
|
||||
}
|
||||
|
||||
const metadataFilters = toQdrantMetadataFilters(query.filters);
|
||||
if (metadataFilters) {
|
||||
mustConditions.push(metadataFilters);
|
||||
}
|
||||
|
||||
return { must: mustConditions };
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts metadata filters to Qdrant-compatible filters
|
||||
* @param subFilters The metadata filters to be converted
|
||||
* @returns A QdrantFilter object or undefined if no valid filters are provided
|
||||
*/
|
||||
function toQdrantMetadataFilters(
|
||||
subFilters?: MetadataFilters,
|
||||
): QdrantFilter | undefined {
|
||||
if (!subFilters?.filters.length) return undefined;
|
||||
|
||||
const conditions: QdrantMustConditions = [];
|
||||
|
||||
for (const subfilter of subFilters.filters) {
|
||||
if (subfilter.operator === FilterOperator.EQ) {
|
||||
if (typeof subfilter.value === "number") {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: {
|
||||
gte: subfilter.value,
|
||||
lte: subfilter.value,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { value: subfilter.value },
|
||||
});
|
||||
}
|
||||
} else if (subfilter.operator === FilterOperator.LT) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { lt: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.GT) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { gt: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.GTE) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { gte: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.LTE) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
range: { lte: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.TEXT_MATCH) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { text: subfilter.value },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.NE) {
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { except: [subfilter.value] },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.IN) {
|
||||
const values = Array.isArray(subfilter.value)
|
||||
? subfilter.value.map(String)
|
||||
: String(subfilter.value).split(",");
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { any: values },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.NIN) {
|
||||
const values = Array.isArray(subfilter.value)
|
||||
? subfilter.value.map(String)
|
||||
: String(subfilter.value).split(",");
|
||||
conditions.push({
|
||||
key: subfilter.key,
|
||||
match: { except: values },
|
||||
});
|
||||
} else if (subfilter.operator === FilterOperator.IS_EMPTY) {
|
||||
conditions.push({
|
||||
is_empty: { key: subfilter.key },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const filter: QdrantFilter = {};
|
||||
if (subFilters.condition === FilterCondition.OR) {
|
||||
filter.should = conditions;
|
||||
} else {
|
||||
filter.must = conditions;
|
||||
}
|
||||
|
||||
return filter;
|
||||
}
|
||||
export * from "@llamaindex/qdrant";
|
||||
|
||||
@@ -5,23 +5,21 @@ import {
|
||||
} from "@llamaindex/core/embeddings";
|
||||
import { DEFAULT_PERSIST_DIR } from "@llamaindex/core/global";
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { fs, path } from "@llamaindex/env";
|
||||
import { exists } from "../storage/FileSystem.js";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
FilterOperator,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
parsePrimitiveValue,
|
||||
VectorStoreQueryMode,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import {
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
parsePrimitiveValue,
|
||||
} from "./utils.js";
|
||||
} from "@llamaindex/core/vector-store";
|
||||
import { fs, path } from "@llamaindex/env";
|
||||
import { exists } from "../storage/FileSystem.js";
|
||||
|
||||
const LEARNER_MODES = new Set<VectorStoreQueryMode>([
|
||||
VectorStoreQueryMode.SVM,
|
||||
|
||||
@@ -1,236 +1 @@
|
||||
import {
|
||||
BaseVectorStore,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
|
||||
import type { BaseNode, Metadata, TextNode } from "@llamaindex/core/schema";
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import { Index } from "@upstash/vector";
|
||||
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
|
||||
|
||||
type UpstashParams = {
|
||||
namespace?: string;
|
||||
token?: string;
|
||||
endpoint?: string;
|
||||
maxBatchSize?: number;
|
||||
} & VectorStoreBaseParams;
|
||||
|
||||
/**
|
||||
* Provides support for writing and querying vector data in Upstash.
|
||||
*/
|
||||
export class UpstashVectorStore extends BaseVectorStore {
|
||||
storesText: boolean = true;
|
||||
|
||||
private db: Index;
|
||||
private maxBatchSize: number;
|
||||
namespace: string;
|
||||
|
||||
/**
|
||||
* @param namespace namespace to use
|
||||
* @param token upstash vector token. if not set, `process.env.UPSTASH_VECTOR_REST_TOKEN` is used.
|
||||
* @param endpoint upstash vector endpoint. If not set, `process.env.UPSTASH_VECTOR_REST_URL` is used.
|
||||
* @param maxBatchSize maximum number of vectors upserted at once. Default is 1000.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const vectorStore = new UpstashVectorStore({ namespace: "my-namespace" })
|
||||
* ```
|
||||
*/
|
||||
constructor(params?: UpstashParams) {
|
||||
super(params);
|
||||
this.namespace = params?.namespace ?? "";
|
||||
this.maxBatchSize = params?.maxBatchSize ?? 1000;
|
||||
const token = params?.token ?? getEnv("UPSTASH_VECTOR_REST_TOKEN");
|
||||
const endpoint = params?.endpoint ?? getEnv("UPSTASH_VECTOR_REST_URL");
|
||||
|
||||
if (!token) {
|
||||
throw new Error(
|
||||
"Must specify UPSTASH_VECTOR_REST_TOKEN via env variable.",
|
||||
);
|
||||
}
|
||||
if (!endpoint) {
|
||||
throw new Error("Must specify UPSTASH_VECTOR_REST_URL via env variable.");
|
||||
}
|
||||
this.db = new Index({ token, url: endpoint });
|
||||
}
|
||||
|
||||
private async getDb(): Promise<Index> {
|
||||
if (!this.db) {
|
||||
const { Index } = await import("@upstash/vector");
|
||||
this.db = new Index();
|
||||
}
|
||||
|
||||
return this.db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects to the database specified in environment vars.
|
||||
* @returns A connection to the database, or the error encountered while connecting/setting up.
|
||||
*/
|
||||
client(): Promise<Index> {
|
||||
return this.getDb();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds vector record(s) to the table.
|
||||
* @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
|
||||
* @returns ids of the embeddings (infered from the id_ field of embeddingResults objects)
|
||||
*/
|
||||
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
if (embeddingResults.length == 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const nodes = embeddingResults.map(this.nodeToRecord);
|
||||
const result = await this.upsertInBatches(nodes);
|
||||
if (result != "OK") {
|
||||
throw new Error("Failed to save chunk");
|
||||
}
|
||||
return nodes.map((node) => node.id).filter((id): id is string => !!id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds plain text record(s) to the table. Upstash take cares of embedding conversion.
|
||||
* @param text The Nodes to be inserted, optionally including metadata tuples.
|
||||
* @returns ids of the embeddings (infered from the id_ field of embeddingResults objects)
|
||||
*/
|
||||
async addPlainText(text: TextNode<Metadata>[]): Promise<string[]> {
|
||||
if (text.length == 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const nodes = text.map(this.textNodeToRecord);
|
||||
const result = await this.upsertInBatches(nodes);
|
||||
if (result != "OK") {
|
||||
throw new Error("Failed to save chunk");
|
||||
}
|
||||
return nodes.map((node) => node.id).filter((id): id is string => !!id);
|
||||
}
|
||||
|
||||
private async upsertInBatches(
|
||||
nodes:
|
||||
| ReturnType<UpstashVectorStore["textNodeToRecord"]>[]
|
||||
| ReturnType<UpstashVectorStore["nodeToRecord"]>[],
|
||||
) {
|
||||
const promises: Promise<string>[] = [];
|
||||
for (let i = 0; i < nodes.length; i += this.maxBatchSize) {
|
||||
const batch = nodes.slice(i, i + this.maxBatchSize);
|
||||
promises.push(this.db.upsert(batch, { namespace: this.namespace }));
|
||||
}
|
||||
const results = await Promise.all(promises);
|
||||
return results.every((result) => result === "OK") ? "OK" : "NOT-OK";
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a single record from the database by id.
|
||||
* NOTE: Uses the collection property controlled by setCollection/getCollection.
|
||||
* @param refDocId Unique identifier for the record to delete.
|
||||
* @returns Promise that resolves if the delete query did not throw an error.
|
||||
*/
|
||||
async delete(refDocId: string): Promise<void> {
|
||||
await this.db.namespace(this.namespace).delete(refDocId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a single record from the database by id.
|
||||
* NOTE: Uses the collection property controlled by setCollection/getCollection.
|
||||
* @param refDocId Unique identifier for the record to delete.
|
||||
* @param deleteKwargs Required by VectorStore interface. Currently ignored.
|
||||
* @returns Promise that resolves if the delete query did not throw an error.
|
||||
*/
|
||||
async deleteMany(refDocId: string[]): Promise<void> {
|
||||
await this.db.namespace(this.namespace).delete(refDocId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the vector store for the closest matching data to the query embeddings
|
||||
* @param query The VectorStoreQuery to be used
|
||||
* @param options Required by VectorStore interface. Currently ignored.
|
||||
* @returns Zero or more Document instances with data from the vector store.
|
||||
*/
|
||||
async query(
|
||||
query: VectorStoreQuery,
|
||||
_options?: object,
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const filter = this.toUpstashFilter(query.filters);
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const defaultOptions: any = {
|
||||
vector: query.queryEmbedding,
|
||||
topK: query.similarityTopK,
|
||||
includeVectors: true,
|
||||
includeMetadata: true,
|
||||
filter,
|
||||
};
|
||||
|
||||
const db = this.db;
|
||||
const results = await db.query(defaultOptions, {
|
||||
namespace: this.namespace,
|
||||
});
|
||||
|
||||
const nodes = results.map((result) => {
|
||||
const node = metadataDictToNode(result.metadata ?? {}, {
|
||||
fallback: {
|
||||
id: result.id,
|
||||
metadata: result.metadata,
|
||||
embedding: result.vector,
|
||||
},
|
||||
});
|
||||
return node;
|
||||
});
|
||||
|
||||
const ret = {
|
||||
nodes: nodes,
|
||||
similarities: results.map((row) => row.score || 999),
|
||||
ids: results.map((row) => String(row.id)),
|
||||
};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
toFilterString(filter: MetadataFilter) {
|
||||
return `${filter.key} ${filter.operator} ${filter.value}`;
|
||||
}
|
||||
|
||||
toUpstashFilter(stdFilters?: MetadataFilters) {
|
||||
if (!stdFilters?.filters) return;
|
||||
|
||||
for (const item of stdFilters.filters) {
|
||||
if (item.operator === "==") {
|
||||
//@ts-expect-error Upstash equal operator uses only one equal sign, so we have to replace it.
|
||||
item.operator = "=";
|
||||
}
|
||||
}
|
||||
|
||||
const filterStrings = stdFilters.filters.map(this.toFilterString);
|
||||
|
||||
if (filterStrings.length === 1) {
|
||||
return filterStrings[0];
|
||||
}
|
||||
return filterStrings.join(` ${stdFilters.condition ?? "and"} `);
|
||||
}
|
||||
|
||||
nodeToRecord(node: BaseNode<Metadata>) {
|
||||
const id = node.id_.length ? node.id_ : null;
|
||||
return {
|
||||
// fixme: why id is possibly null?
|
||||
id: id!,
|
||||
vector: node.getEmbedding(),
|
||||
metadata: nodeToMetadata(node),
|
||||
};
|
||||
}
|
||||
|
||||
textNodeToRecord(node: TextNode<Metadata>) {
|
||||
const id = node.id_.length ? node.id_ : null;
|
||||
return {
|
||||
// fixme: why id is possibly null?
|
||||
id: id!,
|
||||
data: node.text,
|
||||
metadata: nodeToMetadata(node),
|
||||
};
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/upstash";
|
||||
|
||||
@@ -1,354 +1 @@
|
||||
import { BaseNode, MetadataMode, type Metadata } from "@llamaindex/core/schema";
|
||||
import weaviate, {
|
||||
Filters,
|
||||
type Collection,
|
||||
type DeleteManyOptions,
|
||||
type FilterValue,
|
||||
type WeaviateClient,
|
||||
type WeaviateNonGenericObject,
|
||||
} from "weaviate-client";
|
||||
|
||||
import { getEnv } from "@llamaindex/env";
|
||||
import type { BaseHybridOptions } from "weaviate-client";
|
||||
import {
|
||||
BaseVectorStore,
|
||||
VectorStoreQueryMode,
|
||||
type MetadataFilter,
|
||||
type MetadataFilters,
|
||||
type VectorStoreBaseParams,
|
||||
type VectorStoreQuery,
|
||||
type VectorStoreQueryResult,
|
||||
} from "./types.js";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
parseArrayValue,
|
||||
parseNumberValue,
|
||||
} from "./utils.js";
|
||||
|
||||
const NODE_SCHEMA = [
|
||||
{
|
||||
dataType: ["text"],
|
||||
description: "Text property",
|
||||
name: "text",
|
||||
},
|
||||
{
|
||||
dataType: ["text"],
|
||||
description: "The ref_doc_id of the Node",
|
||||
name: "ref_doc_id",
|
||||
},
|
||||
{
|
||||
dataType: ["text"],
|
||||
description: "node_info (in JSON)",
|
||||
name: "node_info",
|
||||
},
|
||||
{
|
||||
dataType: ["text"],
|
||||
description: "The relationships of the node (in JSON)",
|
||||
name: "relationships",
|
||||
},
|
||||
];
|
||||
|
||||
const SIMILARITY_KEYS: {
|
||||
[key: string]: "distance" | "score";
|
||||
} = {
|
||||
[VectorStoreQueryMode.DEFAULT]: "distance",
|
||||
[VectorStoreQueryMode.HYBRID]: "score",
|
||||
};
|
||||
|
||||
const buildFilterItem = (
|
||||
collection: Collection,
|
||||
filter: MetadataFilter,
|
||||
): FilterValue => {
|
||||
const { key, operator, value } = filter;
|
||||
|
||||
switch (operator) {
|
||||
case "==": {
|
||||
return collection.filter.byProperty(key).equal(value);
|
||||
}
|
||||
case "!=": {
|
||||
return collection.filter.byProperty(key).notEqual(value);
|
||||
}
|
||||
case ">": {
|
||||
return collection.filter
|
||||
.byProperty(key)
|
||||
.greaterThan(parseNumberValue(value));
|
||||
}
|
||||
case "<": {
|
||||
return collection.filter
|
||||
.byProperty(key)
|
||||
.lessThan(parseNumberValue(value));
|
||||
}
|
||||
case ">=": {
|
||||
return collection.filter
|
||||
.byProperty(key)
|
||||
.greaterOrEqual(parseNumberValue(value));
|
||||
}
|
||||
case "<=": {
|
||||
return collection.filter
|
||||
.byProperty(key)
|
||||
.lessOrEqual(parseNumberValue(value));
|
||||
}
|
||||
case "any": {
|
||||
return collection.filter
|
||||
.byProperty(key)
|
||||
.containsAny(parseArrayValue(value).map(String));
|
||||
}
|
||||
case "all": {
|
||||
return collection.filter
|
||||
.byProperty(key)
|
||||
.containsAll(parseArrayValue(value).map(String));
|
||||
}
|
||||
default: {
|
||||
throw new Error(`Operator ${operator} is not supported.`);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const toWeaviateFilter = (
|
||||
collection: Collection,
|
||||
standardFilters?: MetadataFilters,
|
||||
): FilterValue | undefined => {
|
||||
if (!standardFilters?.filters.length) return undefined;
|
||||
const filtersList = standardFilters.filters.map((filter) =>
|
||||
buildFilterItem(collection, filter),
|
||||
);
|
||||
if (filtersList.length === 1) return filtersList[0]!;
|
||||
const condition = standardFilters.condition ?? "and";
|
||||
return Filters[condition](...filtersList);
|
||||
};
|
||||
|
||||
export class WeaviateVectorStore extends BaseVectorStore {
|
||||
public storesText: boolean = true;
|
||||
private flatMetadata: boolean = true;
|
||||
|
||||
private weaviateClient?: WeaviateClient;
|
||||
private clusterURL!: string;
|
||||
private apiKey!: string;
|
||||
private indexName: string;
|
||||
|
||||
private idKey: string;
|
||||
private contentKey: string;
|
||||
private embeddingKey: string;
|
||||
private metadataKey: string;
|
||||
|
||||
constructor(
|
||||
init?: VectorStoreBaseParams & {
|
||||
weaviateClient?: WeaviateClient;
|
||||
cloudOptions?: {
|
||||
clusterURL?: string;
|
||||
apiKey?: string;
|
||||
};
|
||||
indexName?: string;
|
||||
idKey?: string;
|
||||
contentKey?: string;
|
||||
metadataKey?: string;
|
||||
embeddingKey?: string;
|
||||
},
|
||||
) {
|
||||
super(init);
|
||||
|
||||
if (init?.weaviateClient) {
|
||||
// Use the provided client
|
||||
this.weaviateClient = init.weaviateClient;
|
||||
} else {
|
||||
// Load client cloud options from config or env
|
||||
const clusterURL =
|
||||
init?.cloudOptions?.clusterURL ?? getEnv("WEAVIATE_CLUSTER_URL");
|
||||
const apiKey = init?.cloudOptions?.apiKey ?? getEnv("WEAVIATE_API_KEY");
|
||||
if (!clusterURL || !apiKey) {
|
||||
throw new Error(
|
||||
"Must specify WEAVIATE_CLUSTER_URL and WEAVIATE_API_KEY via env variable.",
|
||||
);
|
||||
}
|
||||
this.clusterURL = clusterURL;
|
||||
this.apiKey = apiKey;
|
||||
}
|
||||
|
||||
this.checkIndexName(init?.indexName);
|
||||
this.indexName = init?.indexName ?? "LlamaIndex";
|
||||
this.idKey = init?.idKey ?? "id";
|
||||
this.contentKey = init?.contentKey ?? "text";
|
||||
this.embeddingKey = init?.embeddingKey ?? "vectors";
|
||||
this.metadataKey = init?.metadataKey ?? "node_info";
|
||||
}
|
||||
|
||||
public client() {
|
||||
return this.getClient();
|
||||
}
|
||||
|
||||
public async add(nodes: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
const collection = await this.ensureCollection({ createIfNotExists: true });
|
||||
|
||||
const result = await collection.data.insertMany(
|
||||
nodes.map((node) => {
|
||||
const metadata = nodeToMetadata(
|
||||
node,
|
||||
true,
|
||||
this.contentKey,
|
||||
this.flatMetadata,
|
||||
);
|
||||
const body = {
|
||||
[this.idKey]: node.id_,
|
||||
[this.embeddingKey]: node.getEmbedding(),
|
||||
properties: {
|
||||
...metadata,
|
||||
[this.contentKey]: node.getContent(MetadataMode.NONE),
|
||||
[this.metadataKey]: JSON.stringify(metadata),
|
||||
relationships: JSON.stringify({ ref_doc_id: metadata.ref_doc_id }),
|
||||
},
|
||||
};
|
||||
return body;
|
||||
}),
|
||||
);
|
||||
|
||||
return Object.values(result.uuids);
|
||||
}
|
||||
|
||||
public async delete(
|
||||
refDocId: string,
|
||||
deleteOptions?: DeleteManyOptions<boolean>,
|
||||
): Promise<void> {
|
||||
const collection = await this.ensureCollection();
|
||||
await collection.data.deleteMany(
|
||||
collection.filter.byProperty("ref_doc_id").like(refDocId),
|
||||
deleteOptions,
|
||||
);
|
||||
}
|
||||
|
||||
public async query(
|
||||
query: VectorStoreQuery & {
|
||||
queryStr: string;
|
||||
},
|
||||
): Promise<VectorStoreQueryResult> {
|
||||
const collection = await this.ensureCollection();
|
||||
const allProperties = await this.getAllProperties();
|
||||
|
||||
let filters: FilterValue | undefined = undefined;
|
||||
|
||||
if (query.docIds) {
|
||||
filters = collection.filter
|
||||
.byProperty("doc_id")
|
||||
.containsAny(query.docIds);
|
||||
}
|
||||
|
||||
if (query.filters) {
|
||||
filters = toWeaviateFilter(collection, query.filters);
|
||||
}
|
||||
|
||||
const hybridOptions: BaseHybridOptions<undefined> = {
|
||||
returnMetadata: Object.values(SIMILARITY_KEYS),
|
||||
returnProperties: allProperties,
|
||||
includeVector: true,
|
||||
};
|
||||
const alpha = this.getQueryAlpha(query);
|
||||
if (query.queryEmbedding) {
|
||||
hybridOptions.vector = query.queryEmbedding;
|
||||
}
|
||||
if (query.similarityTopK) {
|
||||
hybridOptions.limit = query.similarityTopK;
|
||||
}
|
||||
if (alpha) {
|
||||
hybridOptions.alpha = alpha;
|
||||
}
|
||||
if (filters) {
|
||||
hybridOptions.filters = filters;
|
||||
}
|
||||
|
||||
const queryResult = await collection.query.hybrid(
|
||||
query.queryStr,
|
||||
hybridOptions,
|
||||
);
|
||||
|
||||
const entries = queryResult.objects;
|
||||
|
||||
const similarityKey = SIMILARITY_KEYS[query.mode];
|
||||
const nodes: BaseNode<Metadata>[] = [];
|
||||
const similarities: number[] = [];
|
||||
const ids: string[] = [];
|
||||
|
||||
entries.forEach((entry, index) => {
|
||||
if (index < query.similarityTopK && entry.metadata) {
|
||||
const node = metadataDictToNode(entry.properties);
|
||||
node.setContent(entry.properties[this.contentKey]);
|
||||
nodes.push(node);
|
||||
ids.push(entry.uuid);
|
||||
similarities.push(this.getNodeSimilarity(entry, similarityKey));
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
nodes,
|
||||
similarities,
|
||||
ids,
|
||||
};
|
||||
}
|
||||
|
||||
private async getClient(): Promise<WeaviateClient> {
|
||||
if (this.weaviateClient) return this.weaviateClient;
|
||||
const client = await weaviate.connectToWeaviateCloud(this.clusterURL, {
|
||||
authCredentials: new weaviate.ApiKey(this.apiKey),
|
||||
});
|
||||
this.weaviateClient = client;
|
||||
return client;
|
||||
}
|
||||
|
||||
private async ensureCollection({ createIfNotExists = false } = {}) {
|
||||
const client = await this.getClient();
|
||||
const exists = await this.doesCollectionExist();
|
||||
if (!exists) {
|
||||
if (createIfNotExists) {
|
||||
await this.createCollection();
|
||||
} else {
|
||||
throw new Error(`Collection ${this.indexName} does not exist.`);
|
||||
}
|
||||
}
|
||||
return client.collections.get(this.indexName);
|
||||
}
|
||||
|
||||
private async doesCollectionExist() {
|
||||
const client = await this.getClient();
|
||||
return client.collections.exists(this.indexName);
|
||||
}
|
||||
|
||||
private async createCollection() {
|
||||
const client = await this.getClient();
|
||||
return await client.collections.createFromSchema({
|
||||
class: this.indexName,
|
||||
description: `Collection for ${this.indexName}`,
|
||||
properties: NODE_SCHEMA,
|
||||
});
|
||||
}
|
||||
|
||||
private getQueryAlpha(query: VectorStoreQuery): number | undefined {
|
||||
if (!query.queryEmbedding) return undefined;
|
||||
if (query.mode === VectorStoreQueryMode.DEFAULT) return 1;
|
||||
if (query.mode === VectorStoreQueryMode.HYBRID && query.queryStr)
|
||||
return query.alpha;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
private async getAllProperties(): Promise<string[]> {
|
||||
const collection = await this.ensureCollection();
|
||||
const properties = (await collection.config.get()).properties;
|
||||
return properties.map((p) => p.name);
|
||||
}
|
||||
|
||||
private checkIndexName(indexName?: string) {
|
||||
if (indexName && indexName[0] !== indexName[0]!.toUpperCase()) {
|
||||
throw new Error(
|
||||
"Index name must start with a capital letter, e.g. 'LlamaIndex'",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private getNodeSimilarity(
|
||||
entry: WeaviateNonGenericObject,
|
||||
similarityKey: "distance" | "score" = "distance",
|
||||
): number {
|
||||
const distance = entry.metadata?.[similarityKey];
|
||||
if (distance === undefined) return 1;
|
||||
// convert distance https://forum.weaviate.io/t/distance-vs-certainty-scores/258
|
||||
return 1 - distance;
|
||||
}
|
||||
}
|
||||
export * from "@llamaindex/weaviate";
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
export * from "@llamaindex/core/vector-store";
|
||||
|
||||
export * from "./SimpleVectorStore.js";
|
||||
|
||||
export * from "./AstraDBVectorStore.js";
|
||||
export * from "./azure/AzureAISearchVectorStore.js";
|
||||
export * from "./AzureCosmosDBMongoVectorStore.js";
|
||||
export * from "./AzureCosmosDBNoSqlVectorStore.js";
|
||||
export * from "./AzureAISearchVectorStore.js";
|
||||
export * from "./ChromaVectorStore.js";
|
||||
export * from "./MilvusVectorStore.js";
|
||||
export * from "./MongoDBAtlasVectorStore.js";
|
||||
export * from "./PGVectorStore.js";
|
||||
export * from "./PineconeVectorStore.js";
|
||||
export * from "./QdrantVectorStore.js";
|
||||
export * from "./SimpleVectorStore.js";
|
||||
export * from "./types.js";
|
||||
export * from "./UpstashVectorStore.js";
|
||||
export * from "./WeaviateVectorStore.js";
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
import type { BaseEmbedding } from "@llamaindex/core/embeddings";
|
||||
import type { BaseNode, ModalityType } from "@llamaindex/core/schema";
|
||||
import { getEmbeddedModel } from "../internal/settings/EmbedModel.js";
|
||||
|
||||
export interface VectorStoreQueryResult {
|
||||
nodes?: BaseNode[];
|
||||
similarities: number[];
|
||||
ids: string[];
|
||||
}
|
||||
|
||||
export enum VectorStoreQueryMode {
|
||||
DEFAULT = "default",
|
||||
SPARSE = "sparse",
|
||||
HYBRID = "hybrid",
|
||||
// fit learners
|
||||
SVM = "svm",
|
||||
LOGISTIC_REGRESSION = "logistic_regression",
|
||||
LINEAR_REGRESSION = "linear_regression",
|
||||
// maximum marginal relevance
|
||||
MMR = "mmr",
|
||||
|
||||
// for Azure AI Search
|
||||
SEMANTIC_HYBRID = "semantic_hybrid",
|
||||
}
|
||||
|
||||
export enum FilterOperator {
|
||||
EQ = "==", // default operator (string, number)
|
||||
IN = "in", // In array (string or number)
|
||||
GT = ">", // greater than (number)
|
||||
LT = "<", // less than (number)
|
||||
NE = "!=", // not equal to (string, number)
|
||||
GTE = ">=", // greater than or equal to (number)
|
||||
LTE = "<=", // less than or equal to (number)
|
||||
NIN = "nin", // Not in array (string or number)
|
||||
ANY = "any", // Contains any (array of strings)
|
||||
ALL = "all", // Contains all (array of strings)
|
||||
TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field)
|
||||
CONTAINS = "contains", // metadata array contains value (string or number)
|
||||
IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array)
|
||||
}
|
||||
|
||||
export enum FilterCondition {
|
||||
AND = "and",
|
||||
OR = "or",
|
||||
}
|
||||
|
||||
export type MetadataFilterValue = string | number | string[] | number[];
|
||||
|
||||
export interface MetadataFilter {
|
||||
key: string;
|
||||
value?: MetadataFilterValue;
|
||||
operator: `${FilterOperator}`; // ==, any, all,...
|
||||
}
|
||||
|
||||
export interface MetadataFilters {
|
||||
filters: Array<MetadataFilter>;
|
||||
condition?: `${FilterCondition}`; // and, or
|
||||
}
|
||||
|
||||
export interface MetadataInfo {
|
||||
name: string;
|
||||
type: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface VectorStoreInfo {
|
||||
metadataInfo: MetadataInfo[];
|
||||
contentInfo: string;
|
||||
}
|
||||
|
||||
export interface VectorStoreQuery {
|
||||
queryEmbedding?: number[];
|
||||
similarityTopK: number;
|
||||
docIds?: string[];
|
||||
queryStr?: string;
|
||||
mode: VectorStoreQueryMode;
|
||||
alpha?: number;
|
||||
filters?: MetadataFilters | undefined;
|
||||
mmrThreshold?: number;
|
||||
}
|
||||
|
||||
// Supported types of vector stores (for each modality)
|
||||
export type VectorStoreByType = {
|
||||
[P in ModalityType]?: BaseVectorStore;
|
||||
};
|
||||
|
||||
export type VectorStoreBaseParams = {
|
||||
embeddingModel?: BaseEmbedding | undefined;
|
||||
};
|
||||
|
||||
export abstract class BaseVectorStore<Client = unknown> {
|
||||
embedModel: BaseEmbedding;
|
||||
abstract storesText: boolean;
|
||||
isEmbeddingQuery?: boolean;
|
||||
abstract client(): Client;
|
||||
abstract add(embeddingResults: BaseNode[]): Promise<string[]>;
|
||||
abstract delete(refDocId: string, deleteOptions?: object): Promise<void>;
|
||||
abstract query(
|
||||
query: VectorStoreQuery,
|
||||
options?: object,
|
||||
): Promise<VectorStoreQueryResult>;
|
||||
|
||||
protected constructor(params?: VectorStoreBaseParams) {
|
||||
this.embedModel = params?.embeddingModel ?? getEmbeddedModel();
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,13 @@
|
||||
# @llamaindex/core-test
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 34faf48: chore: move vector stores to their own packages
|
||||
- 4df1fe6: chore: migrate llamaindex llms and embeddings to their own packages
|
||||
- 1931bbc: refactor: @llamaindex/azure
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
import {
|
||||
storageContextFromDefaults,
|
||||
type StorageContext,
|
||||
} from "llamaindex/storage/StorageContext";
|
||||
import { storageContextFromDefaults, type StorageContext } from "llamaindex";
|
||||
import { existsSync, rmSync } from "node:fs";
|
||||
import { mkdtemp } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Document, MetadataMode } from "@llamaindex/core/schema";
|
||||
import {
|
||||
metadataDictToNode,
|
||||
nodeToMetadata,
|
||||
} from "llamaindex/vector-store/utils";
|
||||
} from "@llamaindex/core/vector-store";
|
||||
import { beforeEach, describe, expect, test } from "vitest";
|
||||
|
||||
describe("Testing VectorStore utils", () => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { BaseNode } from "@llamaindex/core/schema";
|
||||
import { AzureCosmosDBNoSqlVectorStore } from "llamaindex";
|
||||
import type { Mocked } from "vitest";
|
||||
import { AzureCosmosDBNoSqlVectorStore } from "../../src/vector-store.js";
|
||||
|
||||
export class TestableAzureCosmosDBNoSqlVectorStore extends AzureCosmosDBNoSqlVectorStore {
|
||||
public nodes: BaseNode[] = [];
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user