Merge branch 'main' into vwp/eval_chains

This commit is contained in:
vowelparrot
2023-06-14 21:43:53 -07:00
40 changed files with 1225 additions and 61 deletions
@@ -0,0 +1,13 @@
---
hide_table_of_contents: true
sidebar_position: 5
---
import CodeBlock from "@theme/CodeBlock";
import Example from "@examples/agents/openai.ts";
# OpenAI Agent for Chat Models
This example covers how to use an agent that uses OpenAI's Function Calling functionality to pick the tool and args to call. This agent only works with specific OpenAI Chat Models, more information in the [OpenAI documentation](https://platform.openai.com/docs/guides/gpt/function-calling).
<CodeBlock language="typescript">{Example}</CodeBlock>
@@ -0,0 +1,129 @@
# Typesense
Vector store that utilizes the Typesense search engine.
### Basic Usage
```typescript
import { Typesense, TypesenseConfig } from "langchain/vectorstores/typesense";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { Client } from "typesense";
import { Document } from "langchain/document";
const vectorTypesenseClient = new Client({
nodes: [
{
// Ideally should come from your .env file
host: "...",
port: 123,
protocol: "https",
},
],
// Ideally should come from your .env file
apiKey: "...",
numRetries: 3,
connectionTimeoutSeconds: 60,
});
const typesenseVectorStoreConfig = {
// Typesense client
typesenseClient: vectorTypesenseClient,
// Name of the collection to store the vectors in
schemaName: "your_schema_name",
// Optional column names to be used in Typesense
columnNames: {
// "vec" is the default name for the vector column in Typesense but you can change it to whatever you want
vector: "vec",
// "text" is the default name for the text column in Typesense but you can change it to whatever you want
pageContent: "text",
// Names of the columns that you will save in your typesense schema and need to be retrieved as metadata when searching
metadataColumnNames: ["foo", "bar", "baz"],
},
// Optional search parameters to be passed to Typesense when searching
searchParams: {
q: "*",
filter_by: "foo:[fooo]",
query_by: "",
},
// You can override the default Typesense import function if you want to do something more complex
// Default import function:
// async importToTypesense<
// T extends Record<string, unknown> = Record<string, unknown>
// >(data: T[], collectionName: string) {
// const chunkSize = 2000;
// for (let i = 0; i < data.length; i += chunkSize) {
// const chunk = data.slice(i, i + chunkSize);
// await this.caller.call(async () => {
// await this.client
// .collections<T>(collectionName)
// .documents()
// .import(chunk, { action: "emplace", dirty_values: "drop" });
// });
// }
// }
import: async (data, collectionName) => {
await vectorTypesenseClient
.collections(collectionName)
.documents()
.import(data, { action: "emplace", dirty_values: "drop" });
},
} satisfies TypesenseConfig;
/**
* Creates a Typesense vector store from a list of documents.
* Will update documents if there is a document with the same id, at least with the default import function.
* @param documents list of documents to create the vector store from
* @returns Typesense vector store
*/
const createVectorStoreWithTypesense = async (documents: Document[] = []) =>
Typesense.fromDocuments(
documents,
new OpenAIEmbeddings(),
typesenseVectorStoreConfig
);
/**
* Returns a Typesense vector store from an existing index.
* @returns Typesense vector store
*/
const getVectorStoreWithTypesense = async () =>
new Typesense(new OpenAIEmbeddings(), typesenseVectorStoreConfig);
// Do a similarity search
const vectorStore = await getVectorStoreWithTypesense();
const documents = await vectorStore.similaritySearch("hello world");
// Add filters based on metadata with the search parameters of Typesense
// will exclude documents with author:JK Rowling, so if Joe Rowling & JK Rowling exists, only Joe Rowling will be returned
vectorStore.similaritySearch("Rowling", undefined, {
filter_by: "author:!=JK Rowling",
});
// Delete a document
vectorStore.deleteDocuments(["document_id_1", "document_id_2"]);
```
### Constructor
Before starting, create a schema in Typesense with an id, a field for the vector and a field for the text. Add as many other fields as needed for the metadata.
- `constructor(embeddings: Embeddings, config: TypesenseConfig)`: Constructs a new instance of the `Typesense` class.
- `embeddings`: An instance of the `Embeddings` class used for embedding documents.
- `config`: Configuration object for the Typesense vector store.
- `typesenseClient`: Typesense client instance.
- `schemaName`: Name of the Typesense schema in which documents will be stored and searched.
- `searchParams` (optional): Typesense search parameters. Default is `{ q: '*', per_page: 5, query_by: '' }`.
- `columnNames` (optional): Column names configuration.
- `vector` (optional): Vector column name. Default is `'vec'`.
- `pageContent` (optional): Page content column name. Default is `'text'`.
- `metadataColumnNames` (optional): Metadata column names. Default is an empty array `[]`.
- `import` (optional): Replace the default import function for importing data to Typesense. This can affect the functionality of updating documents.
### Methods
- `async addDocuments(documents: Document[]): Promise<void>`: Adds documents to the vector store. The documents will be updated if there is a document with the same ID.
- `static async fromDocuments(docs: Document[], embeddings: Embeddings, config: TypesenseConfig): Promise<Typesense>`: Creates a Typesense vector store from a list of documents. Documents are added to the vector store during construction.
- `static async fromTexts(texts: string[], metadatas: object[], embeddings: Embeddings, config: TypesenseConfig): Promise<Typesense>`: Creates a Typesense vector store from a list of texts and associated metadata. Texts are converted to documents and added to the vector store during construction.
- `async similaritySearch(query: string, k?: number, filter?: Record<string, unknown>): Promise<Document[]>`: Searches for similar documents based on a query. Returns an array of similar documents.
- `async deleteDocuments(documentIds: string[]): Promise<void>`: Deletes documents from the vector store based on their IDs.
@@ -15,6 +15,7 @@ LangChain offers a number of LLM implementations that integrate with various mod
import { OpenAI } from "langchain/llms/openai";
const model = new OpenAI({
modelName: "text-davinci-003", // Defaults to "text-davinci-003" if no model provided.
temperature: 0.9,
openAIApiKey: "YOUR-API-KEY", // In Node.js defaults to process.env.OPENAI_API_KEY
});
+5 -1
View File
@@ -4,7 +4,7 @@ import CodeBlock from "@theme/CodeBlock";
LangChain provides a callback system that allows you to hook into the various stages of your LLM application. This is useful for logging, [monitoring](../tracing), [streaming](../../modules/models/llms/additional_functionality#streaming-responses), and other tasks.
You can subscribe to these events by using the `callbacks` argument available throughout the API. This method accepts a list of handler objects, which are expected to implement one or more of the methods described in the [API docs](../../api/callbacks/interfaces/CallbackHandlerMethods).
You can subscribe to these events by using the `callbacks` argument available throughout the API. This method accepts a list of handler objects, which are expected to implement [one or more of the methods described in the API docs](../../api/callbacks/interfaces/CallbackHandlerMethods).
## Dive deeper
@@ -45,6 +45,10 @@ import VerboseExample from "@examples/callbacks/docs_verbose.ts";
- Constructor callbacks are most useful for use cases such as logging, monitoring, etc., which are _not specific to a single request_, but rather to the entire chain. For example, if you want to log all the requests made to an LLMChain, you would pass a handler to the constructor.
- Request callbacks are most useful for use cases such as streaming, where you want to stream the output of a single request to a specific websocket connection, or other similar use cases. For example, if you want to stream the output of a single request to a websocket, you would pass a handler to the `call()` method
## Tags
You can add tags to your callbacks by passing a `tags` argument to the `call()`/`run()`/`apply()` methods. This is useful for filtering your logs, eg. if you want to log all requests made to a specific LLMChain, you can add a tag, and then filter your logs by that tag. You can pass tags to both constructor and request callbacks, see the examples above for details. These tags are then passed to the `tags` argument of the "start" callback methods, ie. [`handleLLMStart`](../../api/callbacks/interfaces/CallbackHandlerMethods#handlellmstart), [`handleChatModelStart`](../../api/callbacks/interfaces/CallbackHandlerMethods#handlechatmodelstart), [`handleChainStart`](../../api/callbacks/interfaces/CallbackHandlerMethods#handlechainstart), [`handleToolStart`](../../api/callbacks/interfaces/CallbackHandlerMethods#handletoolstart).
## Backgrounding callbacks
By default callbacks run in-line with the your chain/LLM run. This means that if you have a slow callback you can see an impact on the overall latency of your runs. You can make callbacks not be awaited by setting the environment variable `LANGCHAIN_CALLBACKS_BACKGROUND=true`. This will cause the callbacks to be run in the background, and will not impact the overall latency of your runs. When you do this you might need to await all pending callbacks before exiting your application. You can do this with the following method:
+1 -1
View File
@@ -21,7 +21,7 @@ function Imports({ imports }) {
</h4>
<ul style={{ paddingBottom: "1rem" }}>
{imports.map(({ imported, source, docs }) => (
<li>
<li key={imported}>
<a href={docs}>
<span>{imported}</span>
</a>{" "}
+1
View File
@@ -48,6 +48,7 @@
"redis": "^4.6.6",
"sqlite3": "^5.1.4",
"typeorm": "^0.3.12",
"typesense": "^1.5.3",
"weaviate-ts-client": "^1.0.0",
"zod": "^3.21.4"
},
+15
View File
@@ -0,0 +1,15 @@
import { initializeAgentExecutorWithOptions } from "langchain/agents";
import { ChatOpenAI } from "langchain/chat_models/openai";
import { SerpAPI } from "langchain/tools";
import { Calculator } from "langchain/tools/calculator";
const executor = await initializeAgentExecutorWithOptions(
[new Calculator(), new SerpAPI()],
new ChatOpenAI({ modelName: "gpt-4-0613", temperature: 0 }),
{
agentType: "openai-functions",
verbose: true,
}
);
const result = await executor.run("What is the weather in New York?");
@@ -3,6 +3,8 @@ import { OpenAI } from "langchain/llms/openai";
const llm = new OpenAI({
temperature: 0,
// These tags will be attached to all calls made with this LLM.
tags: ["example", "callbacks", "constructor"],
// This handler will be used for all calls made with this LLM.
callbacks: [new ConsoleCallbackHandler()],
});
@@ -5,7 +5,10 @@ const llm = new OpenAI({
temperature: 0,
});
// This handler will be used only for this call.
const response = await llm.call("1 + 1 =", undefined, [
new ConsoleCallbackHandler(),
]);
const response = await llm.call(
"1 + 1 =",
// These tags will be attached only to this call to the LLM.
{ tags: ["example", "callbacks", "request"] },
// This handler will be used only for this call.
[new ConsoleCallbackHandler()]
);
@@ -39,7 +39,8 @@ export class MyCustomChain extends BaseChain implements MyCustomChainInputs {
const result = await this.llm.generatePrompt(
[promptValue],
{},
runManager?.getChild()
// This tag "a-tag" will be attached to this inner LLM call
runManager?.getChild("a-tag")
);
// If you want to log something about this run, you can do so by calling
@@ -0,0 +1,97 @@
import { Typesense, TypesenseConfig } from "langchain/vectorstores/typesense";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { Client } from "typesense";
import { Document } from "langchain/document";
const vectorTypesenseClient = new Client({
nodes: [
{
// Ideally should come from your .env file
host: "...",
port: 123,
protocol: "https",
},
],
// Ideally should come from your .env file
apiKey: "...",
numRetries: 3,
connectionTimeoutSeconds: 60,
});
const typesenseVectorStoreConfig = {
// Typesense client
typesenseClient: vectorTypesenseClient,
// Name of the collection to store the vectors in
schemaName: "your_schema_name",
// Optional column names to be used in Typesense
columnNames: {
// "vec" is the default name for the vector column in Typesense but you can change it to whatever you want
vector: "vec",
// "text" is the default name for the text column in Typesense but you can change it to whatever you want
pageContent: "text",
// Names of the columns that you will save in your typesense schema and need to be retrieved as metadata when searching
metadataColumnNames: ["foo", "bar", "baz"],
},
// Optional search parameters to be passed to Typesense when searching
searchParams: {
q: "*",
filter_by: "foo:[fooo]",
query_by: "",
},
// You can override the default Typesense import function if you want to do something more complex
// Default import function:
// async importToTypesense<
// T extends Record<string, unknown> = Record<string, unknown>
// >(data: T[], collectionName: string) {
// const chunkSize = 2000;
// for (let i = 0; i < data.length; i += chunkSize) {
// const chunk = data.slice(i, i + chunkSize);
// await this.caller.call(async () => {
// await this.client
// .collections<T>(collectionName)
// .documents()
// .import(chunk, { action: "emplace", dirty_values: "drop" });
// });
// }
// }
import: async (data, collectionName) => {
await vectorTypesenseClient
.collections(collectionName)
.documents()
.import(data, { action: "emplace", dirty_values: "drop" });
},
} satisfies TypesenseConfig;
/**
* Creates a Typesense vector store from a list of documents.
* Will update documents if there is a document with the same id, at least with the default import function.
* @param documents list of documents to create the vector store from
* @returns Typesense vector store
*/
const createVectorStoreWithTypesense = async (documents: Document[] = []) =>
Typesense.fromDocuments(
documents,
new OpenAIEmbeddings(),
typesenseVectorStoreConfig
);
/**
* Returns a Typesense vector store from an existing index.
* @returns Typesense vector store
*/
const getVectorStoreWithTypesense = async () =>
new Typesense(new OpenAIEmbeddings(), typesenseVectorStoreConfig);
// Do a similarity search
const vectorStore = await getVectorStoreWithTypesense();
const documents = await vectorStore.similaritySearch("hello world");
// Add filters based on metadata with the search parameters of Typesense
// will exclude documents with author:JK Rowling, so if Joe Rowling & JK Rowling exists, only Joe Rowling will be returned
vectorStore.similaritySearch("Rowling", undefined, {
filter_by: "author:!=JK Rowling",
});
// Delete a document
vectorStore.deleteDocuments(["document_id_1", "document_id_2"]);
@@ -1,6 +1,73 @@
import { ChatOpenAI } from "langchain/chat_models/openai";
import { HumanChatMessage } from "langchain/schema";
import { SerpAPI } from "langchain/tools";
const model = new ChatOpenAI({
temperature: 0.9,
openAIApiKey: "YOUR-API-KEY", // In Node.js defaults to process.env.OPENAI_API_KEY
});
// You can also pass tools or functions to the model, learn more here
// https://platform.openai.com/docs/guides/gpt/function-calling
const modelForFunctionCalling = new ChatOpenAI({
modelName: "gpt-4-0613",
temperature: 0,
});
await modelForFunctionCalling.predictMessages(
[new HumanChatMessage("What is the weather in New York?")],
{ tools: [new SerpAPI()] }
// Tools will be automatically formatted as functions in the OpenAI format
);
/*
AIChatMessage {
text: '',
name: undefined,
additional_kwargs: {
function_call: {
name: 'search',
arguments: '{\n "input": "current weather in New York"\n}'
}
}
}
*/
await modelForFunctionCalling.predictMessages(
[new HumanChatMessage("What is the weather in New York?")],
{
functions: [
{
name: "get_current_weather",
description: "Get the current weather in a given location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
unit: { type: "string", enum: ["celsius", "fahrenheit"] },
},
required: ["location"],
},
},
],
// You can set the `function_call` arg to force the model to use a function
function_call: {
name: "get_current_weather",
},
}
);
/*
AIChatMessage {
text: '',
name: undefined,
additional_kwargs: {
function_call: {
name: 'get_current_weather',
arguments: '{\n "location": "New York"\n}'
}
}
}
*/
+3
View File
@@ -154,6 +154,9 @@ vectorstores/myscale.d.ts
vectorstores/redis.cjs
vectorstores/redis.js
vectorstores/redis.d.ts
vectorstores/typesense.cjs
vectorstores/typesense.js
vectorstores/typesense.d.ts
vectorstores/singlestore.cjs
vectorstores/singlestore.js
vectorstores/singlestore.d.ts
+15 -2
View File
@@ -1,6 +1,6 @@
{
"name": "langchain",
"version": "0.0.93",
"version": "0.0.95",
"description": "Typescript bindings for langchain",
"type": "module",
"engines": {
@@ -166,6 +166,9 @@
"vectorstores/redis.cjs",
"vectorstores/redis.js",
"vectorstores/redis.d.ts",
"vectorstores/typesense.cjs",
"vectorstores/typesense.js",
"vectorstores/typesense.d.ts",
"vectorstores/singlestore.cjs",
"vectorstores/singlestore.js",
"vectorstores/singlestore.d.ts",
@@ -508,6 +511,7 @@
"ts-jest": "^29.1.0",
"typeorm": "^0.3.12",
"typescript": "^5.0.0",
"typesense": "^1.5.3",
"weaviate-ts-client": "^1.0.0"
},
"peerDependencies": {
@@ -557,6 +561,7 @@
"replicate": "^0.9.0",
"srt-parser-2": "^1.2.2",
"typeorm": "^0.3.12",
"typesense": "^1.5.3",
"weaviate-ts-client": "^1.0.0"
},
"peerDependenciesMeta": {
@@ -698,6 +703,9 @@
"typeorm": {
"optional": true
},
"typesense": {
"optional": true
},
"weaviate-ts-client": {
"optional": true
}
@@ -715,7 +723,7 @@
"langchainplus-sdk": "^0.0.11",
"ml-distance": "^4.0.0",
"object-hash": "^3.0.0",
"openai": "^3.2.0",
"openai": "^3.3.0",
"p-queue": "^6.6.2",
"p-retry": "4",
"uuid": "^9.0.0",
@@ -1012,6 +1020,11 @@
"import": "./vectorstores/redis.js",
"require": "./vectorstores/redis.cjs"
},
"./vectorstores/typesense": {
"types": "./vectorstores/typesense.d.ts",
"import": "./vectorstores/typesense.js",
"require": "./vectorstores/typesense.cjs"
},
"./vectorstores/singlestore": {
"types": "./vectorstores/singlestore.d.ts",
"import": "./vectorstores/singlestore.js",
+2
View File
@@ -68,6 +68,7 @@ const entrypoints = {
"vectorstores/typeorm": "vectorstores/typeorm",
"vectorstores/myscale": "vectorstores/myscale",
"vectorstores/redis": "vectorstores/redis",
"vectorstores/typesense": "vectorstores/typesense",
"vectorstores/singlestore": "vectorstores/singlestore",
"vectorstores/tigris": "vectorstores/tigris",
// text_splitter
@@ -210,6 +211,7 @@ const requiresOptionalDependency = [
"vectorstores/myscale",
"vectorstores/redis",
"vectorstores/singlestore",
"vectorstores/typesense",
"vectorstores/tigris",
"memory/zep",
"document_loaders/web/apify_dataset",
+4 -1
View File
@@ -203,7 +203,7 @@ export interface AgentArgs {
export abstract class Agent extends BaseSingleActionAgent {
llmChain: LLMChain;
outputParser: AgentActionOutputParser;
outputParser: AgentActionOutputParser | undefined;
private _allowedTools?: string[] = undefined;
@@ -323,6 +323,9 @@ export abstract class Agent extends BaseSingleActionAgent {
}
const output = await this.llmChain.predict(newInputs, callbackManager);
if (!this.outputParser) {
throw new Error("Output parser not set");
}
return this.outputParser.parse(output, callbackManager);
}
+5
View File
@@ -70,3 +70,8 @@ export {
StructuredChatOutputParserArgs,
StructuredChatOutputParserWithRetries,
} from "./structured_chat/outputParser.js";
export {
OpenAIAgent,
OpenAIAgentInput,
OpenAIAgentCreatePromptArgs,
} from "./openai/index.js";
+25 -2
View File
@@ -7,6 +7,7 @@ import { ChatConversationalAgent } from "./chat_convo/index.js";
import { StructuredChatAgent } from "./structured_chat/index.js";
import { AgentExecutor, AgentExecutorInput } from "./executor.js";
import { ZeroShotAgent } from "./mrkl/index.js";
import { OpenAIAgent } from "./openai/index.js";
type AgentType =
| "zero-shot-react-description"
@@ -78,10 +79,14 @@ export type InitializeAgentExecutorOptions =
* @interface
*/
export type InitializeAgentExecutorOptionsStructured =
| {
| ({
agentType: "structured-chat-zero-shot-react-description";
agentArgs?: Parameters<typeof StructuredChatAgent.fromLLMAndTools>[2];
} & Omit<AgentExecutorInput, "agent" | "tools">;
} & Omit<AgentExecutorInput, "agent" | "tools">)
| ({
agentType: "openai-functions";
agentArgs?: Parameters<typeof OpenAIAgent.fromLLMAndTools>[2];
} & Omit<AgentExecutorInput, "agent" | "tools">);
/**
* Initialize an agent executor with options
@@ -147,6 +152,7 @@ export async function initializeAgentExecutorWithOptions(
returnMessages: true,
memoryKey: "chat_history",
inputKey: "input",
outputKey: "output",
}),
...rest,
});
@@ -162,6 +168,23 @@ export async function initializeAgentExecutorWithOptions(
});
return executor;
}
case "openai-functions": {
const { agentArgs, memory, ...rest } = options;
const executor = AgentExecutor.fromAgentAndTools({
agent: OpenAIAgent.fromLLMAndTools(llm, tools, agentArgs),
tools,
memory:
memory ??
new BufferMemory({
returnMessages: true,
memoryKey: "chat_history",
inputKey: "input",
outputKey: "output",
}),
...rest,
});
return executor;
}
default: {
throw new Error("Unknown agent type");
}
+165
View File
@@ -0,0 +1,165 @@
import { ChatCompletionRequestMessageFunctionCall } from "openai";
import { CallbackManager } from "../../callbacks/manager.js";
import { ChatOpenAI } from "../../chat_models/openai.js";
import { BasePromptTemplate } from "../../prompts/base.js";
import {
AIChatMessage,
AgentAction,
AgentFinish,
AgentStep,
BaseChatMessage,
FunctionChatMessage,
ChainValues,
} from "../../schema/index.js";
import { StructuredTool } from "../../tools/base.js";
import { Agent, AgentArgs } from "../agent.js";
import { AgentInput } from "../types.js";
import { PREFIX } from "./prompt.js";
import {
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
} from "../../prompts/chat.js";
import { BaseLanguageModel } from "../../base_language/index.js";
import { LLMChain } from "../../chains/llm_chain.js";
function parseOutput(message: BaseChatMessage): AgentAction | AgentFinish {
if (message.additional_kwargs.function_call) {
// eslint-disable-next-line prefer-destructuring
const function_call: ChatCompletionRequestMessageFunctionCall =
message.additional_kwargs.function_call;
return {
tool: function_call.name as string,
toolInput: function_call.arguments
? JSON.parse(function_call.arguments)
: {},
log: message.text,
};
} else {
return { returnValues: { output: message.text }, log: message.text };
}
}
export interface OpenAIAgentInput extends AgentInput {
tools: StructuredTool[];
}
export interface OpenAIAgentCreatePromptArgs {
prefix?: string;
}
export class OpenAIAgent extends Agent {
lc_namespace = ["langchain", "agents", "openai"];
_agentType() {
return "openai-functions" as const;
}
observationPrefix() {
return "Observation: ";
}
llmPrefix() {
return "Thought:";
}
_stop(): string[] {
return ["Observation:"];
}
tools: StructuredTool[];
constructor(input: Omit<OpenAIAgentInput, "outputParser">) {
super({ ...input, outputParser: undefined });
this.tools = input.tools;
}
static createPrompt(
_tools: StructuredTool[],
fields?: OpenAIAgentCreatePromptArgs
): BasePromptTemplate {
const { prefix = PREFIX } = fields || {};
return ChatPromptTemplate.fromPromptMessages([
SystemMessagePromptTemplate.fromTemplate(prefix),
new MessagesPlaceholder("chat_history"),
HumanMessagePromptTemplate.fromTemplate("{input}"),
new MessagesPlaceholder("agent_scratchpad"),
]);
}
static fromLLMAndTools(
llm: BaseLanguageModel,
tools: StructuredTool[],
args?: OpenAIAgentCreatePromptArgs & Pick<AgentArgs, "callbacks">
) {
OpenAIAgent.validateTools(tools);
if (llm._modelType() !== "base_chat_model" || llm._llmType() !== "openai") {
throw new Error("OpenAIAgent requires an OpenAI chat model");
}
const prompt = OpenAIAgent.createPrompt(tools, args);
const chain = new LLMChain({
prompt,
llm,
callbacks: args?.callbacks,
});
return new OpenAIAgent({
llmChain: chain,
allowedTools: tools.map((t) => t.name),
tools,
});
}
async constructScratchPad(
steps: AgentStep[]
): Promise<string | BaseChatMessage[]> {
return steps.flatMap(({ action, observation }) => [
new AIChatMessage("", {
function_call: {
name: action.tool,
arguments: JSON.stringify(action.toolInput),
},
}),
new FunctionChatMessage(observation, action.tool),
]);
}
async plan(
steps: Array<AgentStep>,
inputs: ChainValues,
callbackManager?: CallbackManager
): Promise<AgentAction | AgentFinish> {
// Add scratchpad and stop to inputs
const thoughts = await this.constructScratchPad(steps);
const newInputs: ChainValues = {
...inputs,
agent_scratchpad: thoughts,
};
if (this._stop().length !== 0) {
newInputs.stop = this._stop();
}
// Split inputs between prompt and llm
const llm = this.llmChain.llm as ChatOpenAI;
const valuesForPrompt = { ...newInputs };
const valuesForLLM: (typeof llm)["CallOptions"] = {
tools: this.tools,
};
for (const key of this.llmChain.llm.callKeys) {
if (key in inputs) {
valuesForLLM[key as keyof (typeof llm)["CallOptions"]] = inputs[key];
delete valuesForPrompt[key];
}
}
const promptValue = await this.llmChain.prompt.formatPromptValue(
valuesForPrompt
);
const message = await llm.predictMessages(
promptValue.toChatMessages(),
valuesForLLM,
callbackManager
);
return parseOutput(message);
}
}
+2
View File
@@ -0,0 +1,2 @@
export const PREFIX = `You are a helpful AI assistant.`;
export const SUFFIX = ``;
@@ -0,0 +1,63 @@
/* eslint-disable no-process-env */
import { test } from "@jest/globals";
import { initializeAgentExecutorWithOptions } from "../initialize.js";
import { Calculator } from "../../tools/calculator.js";
import { SerpAPI } from "../../tools/serpapi.js";
import { ChatOpenAI } from "../../chat_models/openai.js";
test("OpenAIAgent", async () => {
const executor = await initializeAgentExecutorWithOptions(
[
new Calculator(),
new SerpAPI(process.env.SERPAPI_API_KEY, {
location: "Austin,Texas,United States",
hl: "en",
gl: "us",
}),
],
new ChatOpenAI({ modelName: "gpt-4-0613", temperature: 0 }),
{
agentType: "openai-functions",
verbose: true,
}
);
const result = await executor.run("What is the weather in New York?");
console.log(result);
const result2 = await executor.run(
"And what is the weather like in the capital of that state?"
);
console.log(result2);
});
test("OpenAIAgent streaming", async () => {
const executor = await initializeAgentExecutorWithOptions(
[
new Calculator(),
new SerpAPI(process.env.SERPAPI_API_KEY, {
location: "Austin,Texas,United States",
hl: "en",
gl: "us",
}),
],
new ChatOpenAI({
modelName: "gpt-4-0613",
temperature: 0,
streaming: true,
}),
{
agentType: "openai-functions",
returnIntermediateSteps: true,
maxIterations: 3,
}
);
const result = await executor.call({
input: "What is the weather in New York?",
});
console.log(result);
});
+1 -1
View File
@@ -5,7 +5,7 @@ import { BaseOutputParser } from "../schema/output_parser.js";
export interface AgentInput {
llmChain: LLMChain;
outputParser: AgentActionOutputParser;
outputParser: AgentActionOutputParser | undefined;
allowedTools?: string[];
}
@@ -100,7 +100,9 @@ test("Test Chat Model Run", async () => {
[
{
"data": {
"additional_kwargs": {},
"content": "Avast",
"name": undefined,
"role": undefined,
},
"type": "human",
+5 -2
View File
@@ -82,13 +82,16 @@ export abstract class BaseChain extends BaseLangChain implements ChainInputs {
input: any,
callbacks?: Callbacks
): Promise<string> {
const isKeylessInput = this.inputKeys.length <= 1;
const inputKeys = this.inputKeys.filter(
(k) => !this.memory?.memoryKeys.includes(k) ?? true
);
const isKeylessInput = inputKeys.length <= 1;
if (!isKeylessInput) {
throw new Error(
`Chain ${this._chainType()} expects multiple inputs, cannot use 'run' `
);
}
const values = this.inputKeys.length ? { [this.inputKeys[0]]: input } : {};
const values = inputKeys.length ? { [inputKeys[0]]: input } : {};
const returnValues = await this.call(values, callbacks);
const keys = Object.keys(returnValues);
+29 -20
View File
@@ -55,21 +55,26 @@ export class StuffDocumentsChain
}
/** @ignore */
async _call(
values: ChainValues,
runManager?: CallbackManagerForChainRun
): Promise<ChainValues> {
_prepInputs(values: ChainValues): ChainValues {
if (!(this.inputKey in values)) {
throw new Error(`Document key ${this.inputKey} not found.`);
}
const { [this.inputKey]: docs, ...rest } = values;
const texts = (docs as Document[]).map(({ pageContent }) => pageContent);
const text = texts.join("\n\n");
return {
...rest,
[this.documentVariableName]: text,
};
}
/** @ignore */
async _call(
values: ChainValues,
runManager?: CallbackManagerForChainRun
): Promise<ChainValues> {
const result = await this.llmChain.call(
{
...rest,
[this.documentVariableName]: text,
},
this._prepInputs(values),
runManager?.getChild("combine_documents")
);
return result;
@@ -105,7 +110,7 @@ export interface MapReduceDocumentsChainInput extends StuffDocumentsChainInput {
/** Ensures that the map step is taken regardless of max tokens */
ensureMapStep?: boolean;
/** Chain to use to combine results of applying llm_chain to documents. */
combineDocumentChain: BaseChain;
combineDocumentChain: StuffDocumentsChain;
/** Return the results of the map steps in the output. */
returnIntermediateSteps?: boolean;
}
@@ -141,7 +146,7 @@ export class MapReduceDocumentsChain
ensureMapStep = false;
combineDocumentChain: BaseChain;
combineDocumentChain: StuffDocumentsChain;
constructor(fields: MapReduceDocumentsChainInput) {
super(fields);
@@ -179,14 +184,15 @@ export class MapReduceDocumentsChain
const canSkipMapStep = i !== 0 || !this.ensureMapStep;
if (canSkipMapStep) {
// Calculate the total tokens required in the input
const promises = inputs.map(async (i) => {
const prompt = await this.llmChain.prompt.format(i);
return this.llmChain.llm.getNumTokens(prompt);
});
const length = await Promise.all(promises).then((results) =>
results.reduce((a, b) => a + b, 0)
);
const formatted =
await this.combineDocumentChain.llmChain.prompt.format(
this.combineDocumentChain._prepInputs({
[this.combineDocumentChain.inputKey]: currentDocs,
...rest,
})
);
const length =
await this.combineDocumentChain.llmChain.llm.getNumTokens(formatted);
const withinTokenLimit = length < this.maxTokens;
// If we can skip the map step, and we're within the token limit, we don't
@@ -223,7 +229,10 @@ export class MapReduceDocumentsChain
// Now, with the final result of all the inputs from the `llmChain`, we can
// run the `combineDocumentChain` over them.
const newInputs = { input_documents: currentDocs, ...rest };
const newInputs = {
[this.combineDocumentChain.inputKey]: currentDocs,
...rest,
};
const result = await this.combineDocumentChain.call(
newInputs,
runManager?.getChild("combine_documents")
@@ -251,7 +260,7 @@ export class MapReduceDocumentsChain
return new MapReduceDocumentsChain({
llmChain: await LLMChain.deserialize(data.llm_chain),
combineDocumentChain: await BaseChain.deserialize(
combineDocumentChain: await StuffDocumentsChain.deserialize(
data.combine_document_chain
),
});
+1 -1
View File
@@ -54,7 +54,7 @@ export type SerializedChatVectorDBQAChain = {
export type SerializedMapReduceDocumentsChain = {
_type: "map_reduce_documents_chain";
llm_chain?: SerializedLLMChain;
combine_document_chain?: SerializedBaseChain;
combine_document_chain?: SerializedStuffDocumentsChain;
};
export type SerializedRefineDocumentsChain = {
@@ -8,6 +8,7 @@ import {
} from "../../prompts/index.js";
import { LLMChain } from "../llm_chain.js";
import { loadChain } from "../load.js";
import { BufferMemory } from "../../memory/buffer_memory.js";
test("Test OpenAI", async () => {
const model = new OpenAI({ modelName: "text-ada-001" });
@@ -46,6 +47,21 @@ test("Test run method", async () => {
console.log({ res });
});
test("Test run method", async () => {
const model = new OpenAI({ modelName: "text-ada-001" });
const prompt = new PromptTemplate({
template: "{history} Print {foo}",
inputVariables: ["foo", "history"],
});
const chain = new LLMChain({
prompt,
llm: model,
memory: new BufferMemory(),
});
const res = await chain.run("my favorite color");
console.log({ res });
});
test("Test apply", async () => {
const model = new OpenAI({ modelName: "text-ada-001" });
const prompt = new PromptTemplate({
+55 -14
View File
@@ -6,6 +6,9 @@ import {
CreateChatCompletionResponse,
ChatCompletionResponseMessageRoleEnum,
ChatCompletionRequestMessage,
ChatCompletionResponseMessage,
ChatCompletionFunctions,
CreateChatCompletionRequestFunctionCall,
} from "openai";
import { getEnvironmentVariable, isNode } from "../util/env.js";
import {
@@ -29,6 +32,8 @@ import {
import { getModelNameForTiktoken } from "../base_language/count_tokens.js";
import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
import { promptLayerTrackRequest } from "../util/prompt-layer.js";
import { StructuredTool } from "../tools/base.js";
import { formatToOpenAIFunction } from "../tools/convert_to_openai.js";
export { OpenAICallOptions, OpenAIChatInput, AzureOpenAIInput };
@@ -52,27 +57,36 @@ function messageTypeToOpenAIRole(
return "assistant";
case "human":
return "user";
case "function":
return "function";
default:
throw new Error(`Unknown message type: ${type}`);
}
}
function openAIResponseToChatMessage(
role: ChatCompletionResponseMessageRoleEnum | undefined,
text: string
message: ChatCompletionResponseMessage
): BaseChatMessage {
switch (role) {
switch (message.role) {
case "user":
return new HumanChatMessage(text);
return new HumanChatMessage(message.content || "");
case "assistant":
return new AIChatMessage(text);
return new AIChatMessage(message.content || "", {
function_call: message.function_call,
});
case "system":
return new SystemChatMessage(text);
return new SystemChatMessage(message.content || "");
default:
return new ChatMessage(text, role ?? "unknown");
return new ChatMessage(message.content || "", message.role ?? "unknown");
}
}
export interface ChatOpenAICallOptions extends OpenAICallOptions {
function_call?: CreateChatCompletionRequestFunctionCall;
functions?: ChatCompletionFunctions[];
tools?: StructuredTool[];
}
/**
* Wrapper around OpenAI large language models that use the Chat endpoint.
*
@@ -95,10 +109,10 @@ export class ChatOpenAI
extends BaseChatModel
implements OpenAIChatInput, AzureOpenAIInput
{
declare CallOptions: OpenAICallOptions;
declare CallOptions: ChatOpenAICallOptions;
get callKeys(): (keyof OpenAICallOptions)[] {
return ["stop", "signal", "timeout", "options"];
get callKeys(): (keyof ChatOpenAICallOptions)[] {
return ["stop", "signal", "timeout", "options", "functions", "tools"];
}
lc_serializable = true;
@@ -284,6 +298,10 @@ export class ChatOpenAI
const params = this.invocationParams();
params.stop = options?.stop ?? params.stop;
params.functions =
options?.functions ??
(options?.tools ? options?.tools.map(formatToOpenAIFunction) : undefined);
params.function_call = options?.function_call;
const messagesMapped: ChatCompletionRequestMessage[] = messages.map(
(message) => ({
role: messageTypeToOpenAIRole(message._getType()),
@@ -323,7 +341,11 @@ export class ChatOpenAI
choices: Array<{
index: number;
finish_reason: string | null;
delta: { content?: string; role?: string };
delta: {
role?: string;
content?: string;
function_call?: { name: string; arguments: string };
};
}>;
};
@@ -357,17 +379,35 @@ export class ChatOpenAI
choice.message = {
role: part.delta
?.role as ChatCompletionResponseMessageRoleEnum,
content: part.delta?.content ?? "",
content: "",
};
}
if (
part.delta.function_call &&
!choice.message.function_call
) {
choice.message.function_call = {
name: "",
arguments: "",
};
}
choice.message.content += part.delta?.content ?? "";
if (choice.message.function_call) {
choice.message.function_call.name +=
part.delta?.function_call?.name ?? "";
choice.message.function_call.arguments +=
part.delta?.function_call?.arguments ?? "";
}
// TODO this should pass part.index to the callback
// when that's supported there
// eslint-disable-next-line no-void
void runManager?.handleLLMNewToken(
part.delta?.content ?? ""
);
// TODO we don't currently have a callback method for
// sending the function call arguments
}
}
@@ -421,11 +461,12 @@ export class ChatOpenAI
const generations: ChatGeneration[] = [];
for (const part of data.choices) {
const role = part.message?.role ?? undefined;
const text = part.message?.content ?? "";
generations.push({
text,
message: openAIResponseToChatMessage(role, text),
message: openAIResponseToChatMessage(
part.message ?? { role: "assistant" }
),
});
}
return {
+2 -2
View File
@@ -39,7 +39,7 @@ export class OpenAIEmbeddings
{
modelName = "text-embedding-ada-002";
batchSize = this.azureOpenAIApiKey ? 1 : 512;
batchSize = 512;
stripNewLines = true;
@@ -92,7 +92,7 @@ export class OpenAIEmbeddings
getEnvironmentVariable("AZURE_OPENAI_API_VERSION");
this.modelName = fields?.modelName ?? this.modelName;
this.batchSize = fields?.batchSize ?? this.batchSize;
this.batchSize = fields?.batchSize ?? azureApiKey ? 1 : this.batchSize;
this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;
this.timeout = fields?.timeout;
+1
View File
@@ -33,6 +33,7 @@ export const optionalImportEntrypoints = [
"langchain/vectorstores/typeorm",
"langchain/vectorstores/myscale",
"langchain/vectorstores/redis",
"langchain/vectorstores/typesense",
"langchain/vectorstores/singlestore",
"langchain/vectorstores/tigris",
"langchain/memory/zep",
+3
View File
@@ -97,6 +97,9 @@ export interface OptionalImportMap {
"langchain/vectorstores/redis"?:
| typeof import("../vectorstores/redis.js")
| Promise<typeof import("../vectorstores/redis.js")>;
"langchain/vectorstores/typesense"?:
| typeof import("../vectorstores/typesense.js")
| Promise<typeof import("../vectorstores/typesense.js")>;
"langchain/vectorstores/singlestore"?:
| typeof import("../vectorstores/singlestore.js")
| Promise<typeof import("../vectorstores/singlestore.js")>;
+1 -1
View File
@@ -218,6 +218,6 @@ test("Test using partial", async () => {
expect(partialPrompt.inputVariables).toEqual(["bar"]);
expect(await partialPrompt.format({ bar: "baz" })).toMatchInlineSnapshot(
`"[{"type":"human","data":{"content":"foobaz"}}]"`
`"[{"type":"human","data":{"content":"foobaz","additional_kwargs":{}}}]"`
);
});
+21 -2
View File
@@ -52,6 +52,7 @@ export type LLMResult = {
export interface StoredMessageData {
content: string;
role: string | undefined;
name: string | undefined;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
additional_kwargs?: Record<string, any>;
}
@@ -61,7 +62,7 @@ export interface StoredMessage {
data: StoredMessageData;
}
export type MessageType = "human" | "ai" | "generic" | "system";
export type MessageType = "human" | "ai" | "generic" | "system" | "function";
export abstract class BaseChatMessage {
/** The text of the message. */
@@ -70,11 +71,15 @@ export abstract class BaseChatMessage {
/** The name of the message sender in a multi-user chat. */
name?: string;
/** Additional keyword arguments */
additional_kwargs: Record<string, unknown> = {};
/** The type of the message. */
abstract _getType(): MessageType;
constructor(text: string) {
constructor(text: string, kwargs?: Record<string, unknown>) {
this.text = text;
this.additional_kwargs = kwargs || {};
}
toJSON(): StoredMessage {
@@ -83,6 +88,8 @@ export abstract class BaseChatMessage {
data: {
content: this.text,
role: "role" in this ? (this.role as string) : undefined,
name: this.name,
additional_kwargs: this.additional_kwargs,
},
};
}
@@ -106,6 +113,17 @@ export class SystemChatMessage extends BaseChatMessage {
}
}
export class FunctionChatMessage extends BaseChatMessage {
constructor(text: string, name: string) {
super(text);
this.name = name;
}
_getType(): MessageType {
return "function";
}
}
export class ChatMessage extends BaseChatMessage {
role: string;
@@ -150,6 +168,7 @@ export type AgentFinish = {
returnValues: Record<string, any>;
log: string;
};
export type AgentStep = {
action: AgentAction;
observation: string;
+5 -1
View File
@@ -26,6 +26,7 @@ export function mapV1MessageToStoredMessage(
data: {
content: v1Message.text,
role: v1Message.role,
name: undefined,
},
};
}
@@ -40,7 +41,10 @@ export function mapStoredMessagesToChatMessages(
case "human":
return new HumanChatMessage(storedMessage.data.content);
case "ai":
return new AIChatMessage(storedMessage.data.content);
return new AIChatMessage(
storedMessage.data.content,
storedMessage.data.additional_kwargs
);
case "system":
return new SystemChatMessage(storedMessage.data.content);
case "chat":
+1
View File
@@ -23,6 +23,7 @@ test("mapV1MessageToStoredMessage", () => {
data: {
content: "Hello, world!",
role: "user",
name: undefined,
additional_kwargs: {
foo: "bar",
},
+14
View File
@@ -0,0 +1,14 @@
import { zodToJsonSchema } from "zod-to-json-schema";
import { ChatCompletionFunctions } from "openai";
import { StructuredTool } from "./base.js";
export function formatToOpenAIFunction(
tool: StructuredTool
): ChatCompletionFunctions {
return {
name: tool.name,
description: tool.description,
parameters: zodToJsonSchema(tool.schema),
};
}
@@ -0,0 +1,115 @@
import { Client } from "typesense";
import { Document } from "../../document.js";
import { FakeEmbeddings } from "../../embeddings/fake.js";
import { Typesense } from "../typesense.js";
test("documentsToTypesenseRecords should return the correct typesense records", async () => {
const embeddings = new FakeEmbeddings();
const vectorstore = new Typesense(embeddings, {
schemaName: "test",
typesenseClient: {} as unknown as Client,
columnNames: {
vector: "vec",
pageContent: "text",
metadataColumnNames: ["foo", "bar", "baz"],
},
});
const documents: Document[] = [
{
metadata: {
id: "1",
foo: "fooo",
bar: "barr",
baz: "bazz",
},
pageContent: "hello world",
},
{
metadata: {
id: "2",
foo: "foooo",
bar: "barrr",
baz: "bazzz",
},
pageContent: "hello world 2",
},
];
const expected = [
{
text: "hello world",
foo: "fooo",
bar: "barr",
baz: "bazz",
vec: await embeddings.embedQuery("hello world"),
},
{
text: "hello world 2",
foo: "foooo",
bar: "barrr",
baz: "bazzz",
vec: await embeddings.embedQuery("hello world 2"),
},
];
expect(
await vectorstore._documentsToTypesenseRecords(
documents,
await embeddings.embedDocuments(["hello world", "hello world 2"])
)
).toEqual(expected);
});
test("typesenseRecordsToDocuments should return the correct langchain documents", async () => {
const embeddings = new FakeEmbeddings();
const vectorstore = new Typesense(embeddings, {
schemaName: "test",
typesenseClient: {} as unknown as Client,
columnNames: {
vector: "vec",
pageContent: "text",
metadataColumnNames: ["foo", "bar", "baz"],
},
});
const typesenseRecords = [
{
text: "hello world",
foo: "fooo",
bar: "barr",
baz: "bazz",
vec: await embeddings.embedQuery("hello world"),
},
{
text: "hello world 2",
foo: "foooo",
bar: "barrr",
baz: "bazzz",
vec: await embeddings.embedQuery("hello world 2"),
},
];
const expected = [
{
metadata: {
foo: "fooo",
bar: "barr",
baz: "bazz",
},
pageContent: "hello world",
},
{
metadata: {
foo: "foooo",
bar: "barrr",
baz: "bazzz",
},
pageContent: "hello world 2",
},
];
expect(vectorstore._typesenseRecordsToDocuments(typesenseRecords)).toEqual(
expected
);
});
+299
View File
@@ -0,0 +1,299 @@
import type { Client } from "typesense";
import type { MultiSearchRequestSchema } from "typesense/lib/Typesense/MultiSearch.js";
import type { Document } from "../document.js";
import { Embeddings } from "../embeddings/base.js";
import { VectorStore } from "./base.js";
import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";
/**
* Typesense vector store configuration.
*/
export interface TypesenseConfig extends AsyncCallerParams {
/**
* Typesense client.
*/
typesenseClient: Client;
/**
* Typesense schema name in which documents will be stored and searched.
*/
schemaName: string;
/**
* Typesense search parameters.
* @default { q: '*', per_page: 5, query_by: '' }
*/
searchParams?: MultiSearchRequestSchema;
/**
* Column names.
*/
columnNames?: {
/**
* Vector column name.
* @default 'vec'
*/
vector?: string;
/**
* Page content column name.
* @default 'text'
*/
pageContent?: string;
/**
* Metadata column names.
* @default []
*/
metadataColumnNames?: string[];
};
/**
* Replace default import function.
* Default import function will update documents if there is a document with the same id.
* @param data
* @param collectionName
*/
import?<T extends Record<string, unknown> = Record<string, unknown>>(
data: T[],
collectionName: string
): Promise<void>;
}
/**
* Typesense vector store.
*/
export class Typesense extends VectorStore {
declare FilterType: Partial<MultiSearchRequestSchema>;
private client: Client;
private schemaName: string;
private searchParams: MultiSearchRequestSchema;
private vectorColumnName: string;
private pageContentColumnName: string;
private metadataColumnNames: string[];
private caller: AsyncCaller;
private import: (
data: Record<string, unknown>[],
collectionName: string
) => Promise<void>;
constructor(embeddings: Embeddings, config: TypesenseConfig) {
super(embeddings, config);
// Assign config values to class properties.
this.client = config.typesenseClient;
this.schemaName = config.schemaName;
this.searchParams = config.searchParams || {
q: "*",
per_page: 5,
query_by: "",
};
this.vectorColumnName = config.columnNames?.vector || "vec";
this.pageContentColumnName = config.columnNames?.pageContent || "text";
this.metadataColumnNames = config.columnNames?.metadataColumnNames || [];
// Assign import function.
this.import = config.import || this.importToTypesense.bind(this);
this.caller = new AsyncCaller(config);
}
/**
* Default function to import data to typesense
* @param data
* @param collectionName
*/
private async importToTypesense<
T extends Record<string, unknown> = Record<string, unknown>
>(data: T[], collectionName: string) {
const chunkSize = 2000;
for (let i = 0; i < data.length; i += chunkSize) {
const chunk = data.slice(i, i + chunkSize);
await this.caller.call(async () => {
await this.client
.collections<T>(collectionName)
.documents()
.import(chunk, { action: "emplace", dirty_values: "drop" });
});
}
}
/**
* Transform documents to Typesense records.
* @param documents
* @returns Typesense records.
*/
_documentsToTypesenseRecords(
documents: Document[],
vectors: number[][]
): Record<string, unknown>[] {
const metadatas = documents.map((doc) => doc.metadata);
const typesenseDocuments = documents.map((doc, index) => {
const metadata = metadatas[index];
const objectWithMetadatas: Record<string, unknown> = {};
this.metadataColumnNames.forEach((metadataColumnName) => {
objectWithMetadatas[metadataColumnName] = metadata[metadataColumnName];
});
return {
[this.pageContentColumnName]: doc.pageContent,
[this.vectorColumnName]: vectors[index],
...objectWithMetadatas,
};
});
return typesenseDocuments;
}
/**
* Transform the Typesense records to documents.
* @param typesenseRecords
* @returns documents
*/
_typesenseRecordsToDocuments(
typesenseRecords: Record<string, unknown>[] | undefined
): Document[] {
const documents =
typesenseRecords?.map((hit) => {
const objectWithMetadatas: Record<string, unknown> = {};
this.metadataColumnNames.forEach((metadataColumnName) => {
objectWithMetadatas[metadataColumnName] = hit[metadataColumnName];
});
const document: Document = {
pageContent: (hit[this.pageContentColumnName] as string) || "",
metadata: objectWithMetadatas,
};
return document;
}) || [];
return documents;
}
/**
* Add documents to the vector store.
* Will be updated if in the metadata there is a document with the same id if is using the default import function.
* Metadata will be added in the columns of the schema based on metadataColumnNames.
* @param documents Documents to add.
*/
async addDocuments(documents: Document[]) {
const typesenseDocuments = this._documentsToTypesenseRecords(
documents,
await this.embeddings.embedDocuments(
documents.map((doc) => doc.pageContent)
)
);
await this.import(typesenseDocuments, this.schemaName);
}
async addVectors(vectors: number[][], documents: Document[]) {
const typesenseDocuments = this._documentsToTypesenseRecords(
documents,
vectors
);
await this.import(typesenseDocuments, this.schemaName);
}
/**
* Search for similar documents with their similarity score.
* All the documents have 0 as similarity score because Typesense API
* does not return the similarity score.
* @param vectorPrompt vector to search for
* @param k amount of results to return
* @returns similar documents with their similarity score
*/
async similaritySearchVectorWithScore(
vectorPrompt: number[],
k?: number,
filter: this["FilterType"] = {}
) {
const amount = k || this.searchParams.per_page || 5;
const vector_query = `${this.vectorColumnName}:([${vectorPrompt}], k:${amount})`;
const typesenseResponse = await this.client.multiSearch.perform(
{
searches: [
{
...this.searchParams,
...filter,
per_page: amount,
vector_query,
collection: this.schemaName,
},
],
},
{}
);
const results = typesenseResponse.results[0].hits;
const hits = results?.map((hit) => hit.document) as
| Record<string, unknown>[]
| undefined;
const documents = this._typesenseRecordsToDocuments(hits).map(
(doc) => [doc, 0] as [Document<Record<string, unknown>>, number]
);
return documents;
}
/**
* Delete documents from the vector store.
* @param documentIds ids of the documents to delete
*/
async deleteDocuments(documentIds: string[]) {
await this.client
.collections(this.schemaName)
.documents()
.delete({
filter_by: `id:=${documentIds.join(",")}`,
});
}
/**
* Create a vector store from documents.
* @param docs documents
* @param embeddings embeddings
* @param config Typesense configuration
* @returns Typesense vector store
* @warning You can omit this method, and only use the constructor and addDocuments.
*/
static async fromDocuments(
docs: Document[],
embeddings: Embeddings,
config: TypesenseConfig
): Promise<Typesense> {
const instance = new Typesense(embeddings, config);
await instance.addDocuments(docs);
return instance;
}
/**
* Create a vector store from texts.
* @param texts
* @param metadatas
* @param embeddings
* @param config
* @returns Typesense vector store
*/
static async fromTexts(
texts: string[],
metadatas: object[],
embeddings: Embeddings,
config: TypesenseConfig
) {
const instance = new Typesense(embeddings, config);
const documents: Document[] = texts.map((text, i) => ({
pageContent: text,
metadata: metadatas[i] || {},
}));
await instance.addDocuments(documents);
return instance;
}
}
+1
View File
@@ -81,6 +81,7 @@
"src/vectorstores/typeorm.ts",
"src/vectorstores/myscale.ts",
"src/vectorstores/redis.ts",
"src/vectorstores/typesense.ts",
"src/vectorstores/singlestore.ts",
"src/vectorstores/tigris.ts",
"src/text_splitter.ts",
+29 -5
View File
@@ -14278,6 +14278,7 @@ __metadata:
tsx: ^3.12.3
typeorm: ^0.3.12
typescript: ^5.0.0
typesense: ^1.5.3
weaviate-ts-client: ^1.0.0
zod: ^3.21.4
languageName: unknown
@@ -18660,7 +18661,7 @@ __metadata:
mongodb: ^5.2.0
mysql2: ^3.3.3
object-hash: ^3.0.0
openai: ^3.2.0
openai: ^3.3.0
p-queue: ^6.6.2
p-retry: 4
pdf-parse: 1.1.1
@@ -18680,6 +18681,7 @@ __metadata:
ts-jest: ^29.1.0
typeorm: ^0.3.12
typescript: ^5.0.0
typesense: ^1.5.3
uuid: ^9.0.0
weaviate-ts-client: ^1.0.0
yaml: ^2.2.1
@@ -18732,6 +18734,7 @@ __metadata:
replicate: ^0.9.0
srt-parser-2: ^1.2.2
typeorm: ^0.3.12
typesense: ^1.5.3
weaviate-ts-client: ^1.0.0
peerDependenciesMeta:
"@aws-sdk/client-dynamodb":
@@ -18826,6 +18829,8 @@ __metadata:
optional: true
typeorm:
optional: true
typesense:
optional: true
weaviate-ts-client:
optional: true
languageName: unknown
@@ -19203,6 +19208,13 @@ __metadata:
languageName: node
linkType: hard
"loglevel@npm:^1.8.0":
version: 1.8.1
resolution: "loglevel@npm:1.8.1"
checksum: a1a62db40291aaeaef2f612334c49e531bff71cc1d01a2acab689ab80d59e092f852ab164a5aedc1a752fdc46b7b162cb097d8a9eb2cf0b299511106c29af61d
languageName: node
linkType: hard
"long@npm:4.0.0, long@npm:^4.0.0":
version: 4.0.0
resolution: "long@npm:4.0.0"
@@ -20786,13 +20798,13 @@ __metadata:
languageName: node
linkType: hard
"openai@npm:^3.2.0":
version: 3.2.1
resolution: "openai@npm:3.2.1"
"openai@npm:^3.3.0":
version: 3.3.0
resolution: "openai@npm:3.3.0"
dependencies:
axios: ^0.26.0
form-data: ^4.0.0
checksum: ef3942e9b527cf27273c4355bb8fb9ebd94ae3a88c12eec0ac51c4ef0ad8c18864683759471597390816bcd822bdc9f2f1cea7a3eb1e432c9101f568f7c6d19a
checksum: 28ccff8c09b6f47828c9583bb3bafc38a8459c76ea10eb9e08ca880f65523c5a9cc6c5f3c7669dded6f4c93e7cf49dd5c4dbfd12732a0f958c923117740d677b
languageName: node
linkType: hard
@@ -26674,6 +26686,18 @@ __metadata:
languageName: node
linkType: hard
"typesense@npm:^1.5.3":
version: 1.5.3
resolution: "typesense@npm:1.5.3"
dependencies:
axios: ^0.26.0
loglevel: ^1.8.0
peerDependencies:
"@babel/runtime": ^7.17.2
checksum: 6315139ca0be12f558a8ffb78e8055497941ba625fd61729ebe05d4af7d69524d89d1bd56a4a3f38df32fe41216ab6fde2e97bcddddce8af8328d5cddba85f82
languageName: node
linkType: hard
"ua-parser-js@npm:^0.7.30":
version: 0.7.34
resolution: "ua-parser-js@npm:0.7.34"