mirror of
https://github.com/Mintplex-Labs/langchainjs.git
synced 2026-07-01 12:17:38 -04:00
Feat: typesense vector store (#1244)
* chore(langchain): add typesense * feat(langchain/vectorstores): add typesense vectorstore * tests(langchain/vectorstore): add tests transform between docs & typesense records * docs(examples): add typesense vectorstore examples * feat(vectorstores/typesense): add fromTexts * docs(examples): add typesense client to example * docs(examples): add import and similarity search examples * chore(package): typesense as peerDependency * chore(create-entrypoints): add vectorstores/typesense * chore(package): typesense optional dep * lint(vectorstores/typesense): fix errors * fix(vectorstores/typesense): throw error instead of catch it * refactor(vectorstores/typesense): use asyncCaller in default import * refactor(vectorstores/typesense): impor types with import type * feat(vectorstores/typesense): add similaritySearchVectorWithScore & similaritySearchWithScore methods * Fix entrypoints * Remove unnecessary TypeSense initialisation * fix(examples/typesense): add missing properties * feat(typesense): add documentation * fix deleted entry points by error * fix deleted entry points by error * Some fixes * Fix bug * refactor(typesense): remove modifySearchParams, use filters * fix on docs * feat(typesense): addVectors method implemented * fix(typesense): change test after change * Updates * Update docs --------- Co-authored-by: Tat Dat Duong <david@duong.cz> Co-authored-by: Nuno Campos <nuno@boringbits.io>
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
# Typesense
|
||||
|
||||
Vector store that utilizes the Typesense search engine.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```typescript
|
||||
import { Typesense, TypesenseConfig } from "langchain/vectorstores/typesense";
|
||||
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
|
||||
import { Client } from "typesense";
|
||||
import { Document } from "langchain/document";
|
||||
|
||||
const vectorTypesenseClient = new Client({
|
||||
nodes: [
|
||||
{
|
||||
// Ideally should come from your .env file
|
||||
host: "...",
|
||||
port: 123,
|
||||
protocol: "https",
|
||||
},
|
||||
],
|
||||
// Ideally should come from your .env file
|
||||
apiKey: "...",
|
||||
numRetries: 3,
|
||||
connectionTimeoutSeconds: 60,
|
||||
});
|
||||
|
||||
const typesenseVectorStoreConfig = {
|
||||
// Typesense client
|
||||
typesenseClient: vectorTypesenseClient,
|
||||
// Name of the collection to store the vectors in
|
||||
schemaName: "your_schema_name",
|
||||
// Optional column names to be used in Typesense
|
||||
columnNames: {
|
||||
// "vec" is the default name for the vector column in Typesense but you can change it to whatever you want
|
||||
vector: "vec",
|
||||
// "text" is the default name for the text column in Typesense but you can change it to whatever you want
|
||||
pageContent: "text",
|
||||
// Names of the columns that you will save in your typesense schema and need to be retrieved as metadata when searching
|
||||
metadataColumnNames: ["foo", "bar", "baz"],
|
||||
},
|
||||
// Optional search parameters to be passed to Typesense when searching
|
||||
searchParams: {
|
||||
q: "*",
|
||||
filter_by: "foo:[fooo]",
|
||||
query_by: "",
|
||||
},
|
||||
// You can override the default Typesense import function if you want to do something more complex
|
||||
// Default import function:
|
||||
// async importToTypesense<
|
||||
// T extends Record<string, unknown> = Record<string, unknown>
|
||||
// >(data: T[], collectionName: string) {
|
||||
// const chunkSize = 2000;
|
||||
// for (let i = 0; i < data.length; i += chunkSize) {
|
||||
// const chunk = data.slice(i, i + chunkSize);
|
||||
|
||||
// await this.caller.call(async () => {
|
||||
// await this.client
|
||||
// .collections<T>(collectionName)
|
||||
// .documents()
|
||||
// .import(chunk, { action: "emplace", dirty_values: "drop" });
|
||||
// });
|
||||
// }
|
||||
// }
|
||||
import: async (data, collectionName) => {
|
||||
await vectorTypesenseClient
|
||||
.collections(collectionName)
|
||||
.documents()
|
||||
.import(data, { action: "emplace", dirty_values: "drop" });
|
||||
},
|
||||
} satisfies TypesenseConfig;
|
||||
|
||||
/**
|
||||
* Creates a Typesense vector store from a list of documents.
|
||||
* Will update documents if there is a document with the same id, at least with the default import function.
|
||||
* @param documents list of documents to create the vector store from
|
||||
* @returns Typesense vector store
|
||||
*/
|
||||
const createVectorStoreWithTypesense = async (documents: Document[] = []) =>
|
||||
Typesense.fromDocuments(
|
||||
documents,
|
||||
new OpenAIEmbeddings(),
|
||||
typesenseVectorStoreConfig
|
||||
);
|
||||
|
||||
/**
|
||||
* Returns a Typesense vector store from an existing index.
|
||||
* @returns Typesense vector store
|
||||
*/
|
||||
const getVectorStoreWithTypesense = async () =>
|
||||
new Typesense(new OpenAIEmbeddings(), typesenseVectorStoreConfig);
|
||||
|
||||
// Do a similarity search
|
||||
const vectorStore = await getVectorStoreWithTypesense();
|
||||
const documents = await vectorStore.similaritySearch("hello world");
|
||||
|
||||
// Add filters based on metadata with the search parameters of Typesense
|
||||
// will exclude documents with author:JK Rowling, so if Joe Rowling & JK Rowling exists, only Joe Rowling will be returned
|
||||
vectorStore.similaritySearch("Rowling", undefined, {
|
||||
filter_by: "author:!=JK Rowling",
|
||||
});
|
||||
|
||||
// Delete a document
|
||||
vectorStore.deleteDocuments(["document_id_1", "document_id_2"]);
|
||||
```
|
||||
|
||||
### Constructor
|
||||
|
||||
Before starting, create a schema in Typesense with an id, a field for the vector and a field for the text. Add as many other fields as needed for the metadata.
|
||||
|
||||
- `constructor(embeddings: Embeddings, config: TypesenseConfig)`: Constructs a new instance of the `Typesense` class.
|
||||
- `embeddings`: An instance of the `Embeddings` class used for embedding documents.
|
||||
- `config`: Configuration object for the Typesense vector store.
|
||||
- `typesenseClient`: Typesense client instance.
|
||||
- `schemaName`: Name of the Typesense schema in which documents will be stored and searched.
|
||||
- `searchParams` (optional): Typesense search parameters. Default is `{ q: '*', per_page: 5, query_by: '' }`.
|
||||
- `columnNames` (optional): Column names configuration.
|
||||
- `vector` (optional): Vector column name. Default is `'vec'`.
|
||||
- `pageContent` (optional): Page content column name. Default is `'text'`.
|
||||
- `metadataColumnNames` (optional): Metadata column names. Default is an empty array `[]`.
|
||||
- `import` (optional): Replace the default import function for importing data to Typesense. This can affect the functionality of updating documents.
|
||||
|
||||
### Methods
|
||||
|
||||
- `async addDocuments(documents: Document[]): Promise<void>`: Adds documents to the vector store. The documents will be updated if there is a document with the same ID.
|
||||
- `static async fromDocuments(docs: Document[], embeddings: Embeddings, config: TypesenseConfig): Promise<Typesense>`: Creates a Typesense vector store from a list of documents. Documents are added to the vector store during construction.
|
||||
- `static async fromTexts(texts: string[], metadatas: object[], embeddings: Embeddings, config: TypesenseConfig): Promise<Typesense>`: Creates a Typesense vector store from a list of texts and associated metadata. Texts are converted to documents and added to the vector store during construction.
|
||||
- `async similaritySearch(query: string, k?: number, filter?: Record<string, unknown>): Promise<Document[]>`: Searches for similar documents based on a query. Returns an array of similar documents.
|
||||
- `async deleteDocuments(documentIds: string[]): Promise<void>`: Deletes documents from the vector store based on their IDs.
|
||||
@@ -48,6 +48,7 @@
|
||||
"redis": "^4.6.6",
|
||||
"sqlite3": "^5.1.4",
|
||||
"typeorm": "^0.3.12",
|
||||
"typesense": "^1.5.3",
|
||||
"weaviate-ts-client": "^1.0.0",
|
||||
"zod": "^3.21.4"
|
||||
},
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
import { Typesense, TypesenseConfig } from "langchain/vectorstores/typesense";
|
||||
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
|
||||
import { Client } from "typesense";
|
||||
import { Document } from "langchain/document";
|
||||
|
||||
const vectorTypesenseClient = new Client({
|
||||
nodes: [
|
||||
{
|
||||
// Ideally should come from your .env file
|
||||
host: "...",
|
||||
port: 123,
|
||||
protocol: "https",
|
||||
},
|
||||
],
|
||||
// Ideally should come from your .env file
|
||||
apiKey: "...",
|
||||
numRetries: 3,
|
||||
connectionTimeoutSeconds: 60,
|
||||
});
|
||||
|
||||
const typesenseVectorStoreConfig = {
|
||||
// Typesense client
|
||||
typesenseClient: vectorTypesenseClient,
|
||||
// Name of the collection to store the vectors in
|
||||
schemaName: "your_schema_name",
|
||||
// Optional column names to be used in Typesense
|
||||
columnNames: {
|
||||
// "vec" is the default name for the vector column in Typesense but you can change it to whatever you want
|
||||
vector: "vec",
|
||||
// "text" is the default name for the text column in Typesense but you can change it to whatever you want
|
||||
pageContent: "text",
|
||||
// Names of the columns that you will save in your typesense schema and need to be retrieved as metadata when searching
|
||||
metadataColumnNames: ["foo", "bar", "baz"],
|
||||
},
|
||||
// Optional search parameters to be passed to Typesense when searching
|
||||
searchParams: {
|
||||
q: "*",
|
||||
filter_by: "foo:[fooo]",
|
||||
query_by: "",
|
||||
},
|
||||
// You can override the default Typesense import function if you want to do something more complex
|
||||
// Default import function:
|
||||
// async importToTypesense<
|
||||
// T extends Record<string, unknown> = Record<string, unknown>
|
||||
// >(data: T[], collectionName: string) {
|
||||
// const chunkSize = 2000;
|
||||
// for (let i = 0; i < data.length; i += chunkSize) {
|
||||
// const chunk = data.slice(i, i + chunkSize);
|
||||
|
||||
// await this.caller.call(async () => {
|
||||
// await this.client
|
||||
// .collections<T>(collectionName)
|
||||
// .documents()
|
||||
// .import(chunk, { action: "emplace", dirty_values: "drop" });
|
||||
// });
|
||||
// }
|
||||
// }
|
||||
import: async (data, collectionName) => {
|
||||
await vectorTypesenseClient
|
||||
.collections(collectionName)
|
||||
.documents()
|
||||
.import(data, { action: "emplace", dirty_values: "drop" });
|
||||
},
|
||||
} satisfies TypesenseConfig;
|
||||
|
||||
/**
|
||||
* Creates a Typesense vector store from a list of documents.
|
||||
* Will update documents if there is a document with the same id, at least with the default import function.
|
||||
* @param documents list of documents to create the vector store from
|
||||
* @returns Typesense vector store
|
||||
*/
|
||||
const createVectorStoreWithTypesense = async (documents: Document[] = []) =>
|
||||
Typesense.fromDocuments(
|
||||
documents,
|
||||
new OpenAIEmbeddings(),
|
||||
typesenseVectorStoreConfig
|
||||
);
|
||||
|
||||
/**
|
||||
* Returns a Typesense vector store from an existing index.
|
||||
* @returns Typesense vector store
|
||||
*/
|
||||
const getVectorStoreWithTypesense = async () =>
|
||||
new Typesense(new OpenAIEmbeddings(), typesenseVectorStoreConfig);
|
||||
|
||||
// Do a similarity search
|
||||
const vectorStore = await getVectorStoreWithTypesense();
|
||||
const documents = await vectorStore.similaritySearch("hello world");
|
||||
|
||||
// Add filters based on metadata with the search parameters of Typesense
|
||||
// will exclude documents with author:JK Rowling, so if Joe Rowling & JK Rowling exists, only Joe Rowling will be returned
|
||||
vectorStore.similaritySearch("Rowling", undefined, {
|
||||
filter_by: "author:!=JK Rowling",
|
||||
});
|
||||
|
||||
// Delete a document
|
||||
vectorStore.deleteDocuments(["document_id_1", "document_id_2"]);
|
||||
@@ -154,6 +154,9 @@ vectorstores/myscale.d.ts
|
||||
vectorstores/redis.cjs
|
||||
vectorstores/redis.js
|
||||
vectorstores/redis.d.ts
|
||||
vectorstores/typesense.cjs
|
||||
vectorstores/typesense.js
|
||||
vectorstores/typesense.d.ts
|
||||
vectorstores/singlestore.cjs
|
||||
vectorstores/singlestore.js
|
||||
vectorstores/singlestore.d.ts
|
||||
|
||||
@@ -166,6 +166,9 @@
|
||||
"vectorstores/redis.cjs",
|
||||
"vectorstores/redis.js",
|
||||
"vectorstores/redis.d.ts",
|
||||
"vectorstores/typesense.cjs",
|
||||
"vectorstores/typesense.js",
|
||||
"vectorstores/typesense.d.ts",
|
||||
"vectorstores/singlestore.cjs",
|
||||
"vectorstores/singlestore.js",
|
||||
"vectorstores/singlestore.d.ts",
|
||||
@@ -508,6 +511,7 @@
|
||||
"ts-jest": "^29.1.0",
|
||||
"typeorm": "^0.3.12",
|
||||
"typescript": "^5.0.0",
|
||||
"typesense": "^1.5.3",
|
||||
"weaviate-ts-client": "^1.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
@@ -557,6 +561,7 @@
|
||||
"replicate": "^0.9.0",
|
||||
"srt-parser-2": "^1.2.2",
|
||||
"typeorm": "^0.3.12",
|
||||
"typesense": "^1.5.3",
|
||||
"weaviate-ts-client": "^1.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
@@ -698,6 +703,9 @@
|
||||
"typeorm": {
|
||||
"optional": true
|
||||
},
|
||||
"typesense": {
|
||||
"optional": true
|
||||
},
|
||||
"weaviate-ts-client": {
|
||||
"optional": true
|
||||
}
|
||||
@@ -1012,6 +1020,11 @@
|
||||
"import": "./vectorstores/redis.js",
|
||||
"require": "./vectorstores/redis.cjs"
|
||||
},
|
||||
"./vectorstores/typesense": {
|
||||
"types": "./vectorstores/typesense.d.ts",
|
||||
"import": "./vectorstores/typesense.js",
|
||||
"require": "./vectorstores/typesense.cjs"
|
||||
},
|
||||
"./vectorstores/singlestore": {
|
||||
"types": "./vectorstores/singlestore.d.ts",
|
||||
"import": "./vectorstores/singlestore.js",
|
||||
|
||||
@@ -68,6 +68,7 @@ const entrypoints = {
|
||||
"vectorstores/typeorm": "vectorstores/typeorm",
|
||||
"vectorstores/myscale": "vectorstores/myscale",
|
||||
"vectorstores/redis": "vectorstores/redis",
|
||||
"vectorstores/typesense": "vectorstores/typesense",
|
||||
"vectorstores/singlestore": "vectorstores/singlestore",
|
||||
"vectorstores/tigris": "vectorstores/tigris",
|
||||
// text_splitter
|
||||
@@ -210,6 +211,7 @@ const requiresOptionalDependency = [
|
||||
"vectorstores/myscale",
|
||||
"vectorstores/redis",
|
||||
"vectorstores/singlestore",
|
||||
"vectorstores/typesense",
|
||||
"vectorstores/tigris",
|
||||
"memory/zep",
|
||||
"document_loaders/web/apify_dataset",
|
||||
|
||||
@@ -33,6 +33,7 @@ export const optionalImportEntrypoints = [
|
||||
"langchain/vectorstores/typeorm",
|
||||
"langchain/vectorstores/myscale",
|
||||
"langchain/vectorstores/redis",
|
||||
"langchain/vectorstores/typesense",
|
||||
"langchain/vectorstores/singlestore",
|
||||
"langchain/vectorstores/tigris",
|
||||
"langchain/memory/zep",
|
||||
|
||||
Vendored
+3
@@ -97,6 +97,9 @@ export interface OptionalImportMap {
|
||||
"langchain/vectorstores/redis"?:
|
||||
| typeof import("../vectorstores/redis.js")
|
||||
| Promise<typeof import("../vectorstores/redis.js")>;
|
||||
"langchain/vectorstores/typesense"?:
|
||||
| typeof import("../vectorstores/typesense.js")
|
||||
| Promise<typeof import("../vectorstores/typesense.js")>;
|
||||
"langchain/vectorstores/singlestore"?:
|
||||
| typeof import("../vectorstores/singlestore.js")
|
||||
| Promise<typeof import("../vectorstores/singlestore.js")>;
|
||||
|
||||
@@ -168,6 +168,7 @@ export type AgentFinish = {
|
||||
returnValues: Record<string, any>;
|
||||
log: string;
|
||||
};
|
||||
|
||||
export type AgentStep = {
|
||||
action: AgentAction;
|
||||
observation: string;
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
import { Client } from "typesense";
|
||||
import { Document } from "../../document.js";
|
||||
import { FakeEmbeddings } from "../../embeddings/fake.js";
|
||||
import { Typesense } from "../typesense.js";
|
||||
|
||||
test("documentsToTypesenseRecords should return the correct typesense records", async () => {
|
||||
const embeddings = new FakeEmbeddings();
|
||||
const vectorstore = new Typesense(embeddings, {
|
||||
schemaName: "test",
|
||||
typesenseClient: {} as unknown as Client,
|
||||
columnNames: {
|
||||
vector: "vec",
|
||||
pageContent: "text",
|
||||
metadataColumnNames: ["foo", "bar", "baz"],
|
||||
},
|
||||
});
|
||||
|
||||
const documents: Document[] = [
|
||||
{
|
||||
metadata: {
|
||||
id: "1",
|
||||
foo: "fooo",
|
||||
bar: "barr",
|
||||
baz: "bazz",
|
||||
},
|
||||
pageContent: "hello world",
|
||||
},
|
||||
{
|
||||
metadata: {
|
||||
id: "2",
|
||||
foo: "foooo",
|
||||
bar: "barrr",
|
||||
baz: "bazzz",
|
||||
},
|
||||
pageContent: "hello world 2",
|
||||
},
|
||||
];
|
||||
|
||||
const expected = [
|
||||
{
|
||||
text: "hello world",
|
||||
foo: "fooo",
|
||||
bar: "barr",
|
||||
baz: "bazz",
|
||||
vec: await embeddings.embedQuery("hello world"),
|
||||
},
|
||||
{
|
||||
text: "hello world 2",
|
||||
foo: "foooo",
|
||||
bar: "barrr",
|
||||
baz: "bazzz",
|
||||
vec: await embeddings.embedQuery("hello world 2"),
|
||||
},
|
||||
];
|
||||
|
||||
expect(
|
||||
await vectorstore._documentsToTypesenseRecords(
|
||||
documents,
|
||||
await embeddings.embedDocuments(["hello world", "hello world 2"])
|
||||
)
|
||||
).toEqual(expected);
|
||||
});
|
||||
|
||||
test("typesenseRecordsToDocuments should return the correct langchain documents", async () => {
|
||||
const embeddings = new FakeEmbeddings();
|
||||
const vectorstore = new Typesense(embeddings, {
|
||||
schemaName: "test",
|
||||
typesenseClient: {} as unknown as Client,
|
||||
columnNames: {
|
||||
vector: "vec",
|
||||
pageContent: "text",
|
||||
metadataColumnNames: ["foo", "bar", "baz"],
|
||||
},
|
||||
});
|
||||
|
||||
const typesenseRecords = [
|
||||
{
|
||||
text: "hello world",
|
||||
foo: "fooo",
|
||||
bar: "barr",
|
||||
baz: "bazz",
|
||||
vec: await embeddings.embedQuery("hello world"),
|
||||
},
|
||||
{
|
||||
text: "hello world 2",
|
||||
foo: "foooo",
|
||||
bar: "barrr",
|
||||
baz: "bazzz",
|
||||
vec: await embeddings.embedQuery("hello world 2"),
|
||||
},
|
||||
];
|
||||
|
||||
const expected = [
|
||||
{
|
||||
metadata: {
|
||||
foo: "fooo",
|
||||
bar: "barr",
|
||||
baz: "bazz",
|
||||
},
|
||||
pageContent: "hello world",
|
||||
},
|
||||
{
|
||||
metadata: {
|
||||
foo: "foooo",
|
||||
bar: "barrr",
|
||||
baz: "bazzz",
|
||||
},
|
||||
pageContent: "hello world 2",
|
||||
},
|
||||
];
|
||||
|
||||
expect(vectorstore._typesenseRecordsToDocuments(typesenseRecords)).toEqual(
|
||||
expected
|
||||
);
|
||||
});
|
||||
@@ -0,0 +1,299 @@
|
||||
import type { Client } from "typesense";
|
||||
import type { MultiSearchRequestSchema } from "typesense/lib/Typesense/MultiSearch.js";
|
||||
import type { Document } from "../document.js";
|
||||
import { Embeddings } from "../embeddings/base.js";
|
||||
import { VectorStore } from "./base.js";
|
||||
import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";
|
||||
|
||||
/**
|
||||
* Typesense vector store configuration.
|
||||
*/
|
||||
export interface TypesenseConfig extends AsyncCallerParams {
|
||||
/**
|
||||
* Typesense client.
|
||||
*/
|
||||
typesenseClient: Client;
|
||||
/**
|
||||
* Typesense schema name in which documents will be stored and searched.
|
||||
*/
|
||||
schemaName: string;
|
||||
/**
|
||||
* Typesense search parameters.
|
||||
* @default { q: '*', per_page: 5, query_by: '' }
|
||||
*/
|
||||
searchParams?: MultiSearchRequestSchema;
|
||||
/**
|
||||
* Column names.
|
||||
*/
|
||||
columnNames?: {
|
||||
/**
|
||||
* Vector column name.
|
||||
* @default 'vec'
|
||||
*/
|
||||
vector?: string;
|
||||
/**
|
||||
* Page content column name.
|
||||
* @default 'text'
|
||||
*/
|
||||
pageContent?: string;
|
||||
/**
|
||||
* Metadata column names.
|
||||
* @default []
|
||||
*/
|
||||
metadataColumnNames?: string[];
|
||||
};
|
||||
/**
|
||||
* Replace default import function.
|
||||
* Default import function will update documents if there is a document with the same id.
|
||||
* @param data
|
||||
* @param collectionName
|
||||
*/
|
||||
import?<T extends Record<string, unknown> = Record<string, unknown>>(
|
||||
data: T[],
|
||||
collectionName: string
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Typesense vector store.
|
||||
*/
|
||||
export class Typesense extends VectorStore {
|
||||
declare FilterType: Partial<MultiSearchRequestSchema>;
|
||||
|
||||
private client: Client;
|
||||
|
||||
private schemaName: string;
|
||||
|
||||
private searchParams: MultiSearchRequestSchema;
|
||||
|
||||
private vectorColumnName: string;
|
||||
|
||||
private pageContentColumnName: string;
|
||||
|
||||
private metadataColumnNames: string[];
|
||||
|
||||
private caller: AsyncCaller;
|
||||
|
||||
private import: (
|
||||
data: Record<string, unknown>[],
|
||||
collectionName: string
|
||||
) => Promise<void>;
|
||||
|
||||
constructor(embeddings: Embeddings, config: TypesenseConfig) {
|
||||
super(embeddings, config);
|
||||
|
||||
// Assign config values to class properties.
|
||||
this.client = config.typesenseClient;
|
||||
this.schemaName = config.schemaName;
|
||||
this.searchParams = config.searchParams || {
|
||||
q: "*",
|
||||
per_page: 5,
|
||||
query_by: "",
|
||||
};
|
||||
this.vectorColumnName = config.columnNames?.vector || "vec";
|
||||
this.pageContentColumnName = config.columnNames?.pageContent || "text";
|
||||
this.metadataColumnNames = config.columnNames?.metadataColumnNames || [];
|
||||
|
||||
// Assign import function.
|
||||
this.import = config.import || this.importToTypesense.bind(this);
|
||||
|
||||
this.caller = new AsyncCaller(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default function to import data to typesense
|
||||
* @param data
|
||||
* @param collectionName
|
||||
*/
|
||||
private async importToTypesense<
|
||||
T extends Record<string, unknown> = Record<string, unknown>
|
||||
>(data: T[], collectionName: string) {
|
||||
const chunkSize = 2000;
|
||||
for (let i = 0; i < data.length; i += chunkSize) {
|
||||
const chunk = data.slice(i, i + chunkSize);
|
||||
|
||||
await this.caller.call(async () => {
|
||||
await this.client
|
||||
.collections<T>(collectionName)
|
||||
.documents()
|
||||
.import(chunk, { action: "emplace", dirty_values: "drop" });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform documents to Typesense records.
|
||||
* @param documents
|
||||
* @returns Typesense records.
|
||||
*/
|
||||
_documentsToTypesenseRecords(
|
||||
documents: Document[],
|
||||
vectors: number[][]
|
||||
): Record<string, unknown>[] {
|
||||
const metadatas = documents.map((doc) => doc.metadata);
|
||||
|
||||
const typesenseDocuments = documents.map((doc, index) => {
|
||||
const metadata = metadatas[index];
|
||||
const objectWithMetadatas: Record<string, unknown> = {};
|
||||
|
||||
this.metadataColumnNames.forEach((metadataColumnName) => {
|
||||
objectWithMetadatas[metadataColumnName] = metadata[metadataColumnName];
|
||||
});
|
||||
|
||||
return {
|
||||
[this.pageContentColumnName]: doc.pageContent,
|
||||
[this.vectorColumnName]: vectors[index],
|
||||
...objectWithMetadatas,
|
||||
};
|
||||
});
|
||||
|
||||
return typesenseDocuments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform the Typesense records to documents.
|
||||
* @param typesenseRecords
|
||||
* @returns documents
|
||||
*/
|
||||
_typesenseRecordsToDocuments(
|
||||
typesenseRecords: Record<string, unknown>[] | undefined
|
||||
): Document[] {
|
||||
const documents =
|
||||
typesenseRecords?.map((hit) => {
|
||||
const objectWithMetadatas: Record<string, unknown> = {};
|
||||
|
||||
this.metadataColumnNames.forEach((metadataColumnName) => {
|
||||
objectWithMetadatas[metadataColumnName] = hit[metadataColumnName];
|
||||
});
|
||||
|
||||
const document: Document = {
|
||||
pageContent: (hit[this.pageContentColumnName] as string) || "",
|
||||
metadata: objectWithMetadatas,
|
||||
};
|
||||
return document;
|
||||
}) || [];
|
||||
|
||||
return documents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add documents to the vector store.
|
||||
* Will be updated if in the metadata there is a document with the same id if is using the default import function.
|
||||
* Metadata will be added in the columns of the schema based on metadataColumnNames.
|
||||
* @param documents Documents to add.
|
||||
*/
|
||||
async addDocuments(documents: Document[]) {
|
||||
const typesenseDocuments = this._documentsToTypesenseRecords(
|
||||
documents,
|
||||
await this.embeddings.embedDocuments(
|
||||
documents.map((doc) => doc.pageContent)
|
||||
)
|
||||
);
|
||||
await this.import(typesenseDocuments, this.schemaName);
|
||||
}
|
||||
|
||||
async addVectors(vectors: number[][], documents: Document[]) {
|
||||
const typesenseDocuments = this._documentsToTypesenseRecords(
|
||||
documents,
|
||||
vectors
|
||||
);
|
||||
await this.import(typesenseDocuments, this.schemaName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for similar documents with their similarity score.
|
||||
* All the documents have 0 as similarity score because Typesense API
|
||||
* does not return the similarity score.
|
||||
* @param vectorPrompt vector to search for
|
||||
* @param k amount of results to return
|
||||
* @returns similar documents with their similarity score
|
||||
*/
|
||||
async similaritySearchVectorWithScore(
|
||||
vectorPrompt: number[],
|
||||
k?: number,
|
||||
filter: this["FilterType"] = {}
|
||||
) {
|
||||
const amount = k || this.searchParams.per_page || 5;
|
||||
const vector_query = `${this.vectorColumnName}:([${vectorPrompt}], k:${amount})`;
|
||||
const typesenseResponse = await this.client.multiSearch.perform(
|
||||
{
|
||||
searches: [
|
||||
{
|
||||
...this.searchParams,
|
||||
...filter,
|
||||
per_page: amount,
|
||||
vector_query,
|
||||
collection: this.schemaName,
|
||||
},
|
||||
],
|
||||
},
|
||||
{}
|
||||
);
|
||||
const results = typesenseResponse.results[0].hits;
|
||||
const hits = results?.map((hit) => hit.document) as
|
||||
| Record<string, unknown>[]
|
||||
| undefined;
|
||||
|
||||
const documents = this._typesenseRecordsToDocuments(hits).map(
|
||||
(doc) => [doc, 0] as [Document<Record<string, unknown>>, number]
|
||||
);
|
||||
|
||||
return documents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete documents from the vector store.
|
||||
* @param documentIds ids of the documents to delete
|
||||
*/
|
||||
async deleteDocuments(documentIds: string[]) {
|
||||
await this.client
|
||||
.collections(this.schemaName)
|
||||
.documents()
|
||||
.delete({
|
||||
filter_by: `id:=${documentIds.join(",")}`,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a vector store from documents.
|
||||
* @param docs documents
|
||||
* @param embeddings embeddings
|
||||
* @param config Typesense configuration
|
||||
* @returns Typesense vector store
|
||||
* @warning You can omit this method, and only use the constructor and addDocuments.
|
||||
*/
|
||||
static async fromDocuments(
|
||||
docs: Document[],
|
||||
embeddings: Embeddings,
|
||||
config: TypesenseConfig
|
||||
): Promise<Typesense> {
|
||||
const instance = new Typesense(embeddings, config);
|
||||
await instance.addDocuments(docs);
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a vector store from texts.
|
||||
* @param texts
|
||||
* @param metadatas
|
||||
* @param embeddings
|
||||
* @param config
|
||||
* @returns Typesense vector store
|
||||
*/
|
||||
static async fromTexts(
|
||||
texts: string[],
|
||||
metadatas: object[],
|
||||
embeddings: Embeddings,
|
||||
config: TypesenseConfig
|
||||
) {
|
||||
const instance = new Typesense(embeddings, config);
|
||||
const documents: Document[] = texts.map((text, i) => ({
|
||||
pageContent: text,
|
||||
metadata: metadatas[i] || {},
|
||||
}));
|
||||
await instance.addDocuments(documents);
|
||||
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
@@ -81,6 +81,7 @@
|
||||
"src/vectorstores/typeorm.ts",
|
||||
"src/vectorstores/myscale.ts",
|
||||
"src/vectorstores/redis.ts",
|
||||
"src/vectorstores/typesense.ts",
|
||||
"src/vectorstores/singlestore.ts",
|
||||
"src/vectorstores/tigris.ts",
|
||||
"src/text_splitter.ts",
|
||||
|
||||
@@ -14278,6 +14278,7 @@ __metadata:
|
||||
tsx: ^3.12.3
|
||||
typeorm: ^0.3.12
|
||||
typescript: ^5.0.0
|
||||
typesense: ^1.5.3
|
||||
weaviate-ts-client: ^1.0.0
|
||||
zod: ^3.21.4
|
||||
languageName: unknown
|
||||
@@ -18680,6 +18681,7 @@ __metadata:
|
||||
ts-jest: ^29.1.0
|
||||
typeorm: ^0.3.12
|
||||
typescript: ^5.0.0
|
||||
typesense: ^1.5.3
|
||||
uuid: ^9.0.0
|
||||
weaviate-ts-client: ^1.0.0
|
||||
yaml: ^2.2.1
|
||||
@@ -18732,6 +18734,7 @@ __metadata:
|
||||
replicate: ^0.9.0
|
||||
srt-parser-2: ^1.2.2
|
||||
typeorm: ^0.3.12
|
||||
typesense: ^1.5.3
|
||||
weaviate-ts-client: ^1.0.0
|
||||
peerDependenciesMeta:
|
||||
"@aws-sdk/client-dynamodb":
|
||||
@@ -18826,6 +18829,8 @@ __metadata:
|
||||
optional: true
|
||||
typeorm:
|
||||
optional: true
|
||||
typesense:
|
||||
optional: true
|
||||
weaviate-ts-client:
|
||||
optional: true
|
||||
languageName: unknown
|
||||
@@ -19203,6 +19208,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"loglevel@npm:^1.8.0":
|
||||
version: 1.8.1
|
||||
resolution: "loglevel@npm:1.8.1"
|
||||
checksum: a1a62db40291aaeaef2f612334c49e531bff71cc1d01a2acab689ab80d59e092f852ab164a5aedc1a752fdc46b7b162cb097d8a9eb2cf0b299511106c29af61d
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"long@npm:4.0.0, long@npm:^4.0.0":
|
||||
version: 4.0.0
|
||||
resolution: "long@npm:4.0.0"
|
||||
@@ -26674,6 +26686,18 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"typesense@npm:^1.5.3":
|
||||
version: 1.5.3
|
||||
resolution: "typesense@npm:1.5.3"
|
||||
dependencies:
|
||||
axios: ^0.26.0
|
||||
loglevel: ^1.8.0
|
||||
peerDependencies:
|
||||
"@babel/runtime": ^7.17.2
|
||||
checksum: 6315139ca0be12f558a8ffb78e8055497941ba625fd61729ebe05d4af7d69524d89d1bd56a4a3f38df32fe41216ab6fde2e97bcddddce8af8328d5cddba85f82
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"ua-parser-js@npm:^0.7.30":
|
||||
version: 0.7.34
|
||||
resolution: "ua-parser-js@npm:0.7.34"
|
||||
|
||||
Reference in New Issue
Block a user