mirror of
https://github.com/Mintplex-Labs/langchainjs.git
synced 2026-07-01 12:17:38 -04:00
New Feat: Added Weaviate self-query retriever (#1541)
* added weaviate self-query * Update weaviate_translator.ts Make the AllowedOperator consistent with the other types. * replaced neq with ne for translator * Removed in and nin as comparator * Update docs and entrypoint * Fix api key --------- Co-authored-by: jacoblee93 <jacoblee93@gmail.com> Co-authored-by: Nuno Campos <nuno@boringbits.io>
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
# Weaviate Self Query Retriever
|
||||
|
||||
This example shows how to use a self query retriever with a [Weaviate](https://weaviate.io/) vector store.
|
||||
|
||||
If you haven't already set up Weaviate, please [follow the instructions here](/docs/modules/indexes/vector_stores/integrations/weaviate.mdx).
|
||||
|
||||
## Usage
|
||||
|
||||
This example shows how to intialize a `SelfQueryRetriever` with a vector store:
|
||||
|
||||
import CodeBlock from "@theme/CodeBlock";
|
||||
import Example from "@examples/retrievers/weaviate_self_query.ts";
|
||||
|
||||
<CodeBlock language="typescript">{Example}</CodeBlock>
|
||||
@@ -1,4 +1,5 @@
|
||||
import { createClient } from "@supabase/supabase-js";
|
||||
|
||||
import { AttributeInfo } from "langchain/schema/query_constructor";
|
||||
import { Document } from "langchain/document";
|
||||
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
|
||||
@@ -83,9 +84,6 @@ const attributeInfo: AttributeInfo[] = [
|
||||
|
||||
/**
|
||||
* Next, we instantiate a vector store. This is where we store the embeddings of the documents.
|
||||
* We use the Pinecone vector store here, but you can use any vector store you want.
|
||||
* At this point we only support Chroma and Pinecone, but we will add more in the future.
|
||||
* We also need to provide an embeddings object. This is used to embed the documents.
|
||||
*/
|
||||
if (!process.env.SUPABASE_URL || !process.env.SUPABASE_PRIVATE_KEY) {
|
||||
throw new Error(
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
import weaviate from "weaviate-ts-client";
|
||||
|
||||
import { AttributeInfo } from "langchain/schema/query_constructor";
|
||||
import { Document } from "langchain/document";
|
||||
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
|
||||
import { SelfQueryRetriever } from "langchain/retrievers/self_query";
|
||||
import { OpenAI } from "langchain/llms/openai";
|
||||
import { WeaviateStore } from "langchain/vectorstores/weaviate";
|
||||
import { WeaviateTranslator } from "langchain/retrievers/self_query/weaviate";
|
||||
|
||||
/**
|
||||
* First, we create a bunch of documents. You can load your own documents here instead.
|
||||
* Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.
|
||||
*/
|
||||
const docs = [
|
||||
new Document({
|
||||
pageContent:
|
||||
"A bunch of scientists bring back dinosaurs and mayhem breaks loose",
|
||||
metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
|
||||
metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
|
||||
metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"A bunch of normal-sized women are supremely wholesome and some men pine after them",
|
||||
metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
|
||||
}),
|
||||
new Document({
|
||||
pageContent: "Toys come alive and have a blast doing so",
|
||||
metadata: { year: 1995, genre: "animated" },
|
||||
}),
|
||||
new Document({
|
||||
pageContent: "Three men walk into the Zone, three men walk out of the Zone",
|
||||
metadata: {
|
||||
year: 1979,
|
||||
director: "Andrei Tarkovsky",
|
||||
genre: "science fiction",
|
||||
rating: 9.9,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
/**
|
||||
* Next, we define the attributes we want to be able to query on.
|
||||
* in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.
|
||||
* We also provide a description of each attribute and the type of the attribute.
|
||||
* This is used to generate the query prompts.
|
||||
*/
|
||||
const attributeInfo: AttributeInfo[] = [
|
||||
{
|
||||
name: "genre",
|
||||
description: "The genre of the movie",
|
||||
type: "string or array of strings",
|
||||
},
|
||||
{
|
||||
name: "year",
|
||||
description: "The year the movie was released",
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
name: "director",
|
||||
description: "The director of the movie",
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
name: "rating",
|
||||
description: "The rating of the movie (1-10)",
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
name: "length",
|
||||
description: "The length of the movie in minutes",
|
||||
type: "number",
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Next, we instantiate a vector store. This is where we store the embeddings of the documents.
|
||||
*/
|
||||
const embeddings = new OpenAIEmbeddings();
|
||||
const llm = new OpenAI();
|
||||
const documentContents = "Brief summary of a movie";
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const client = (weaviate as any).client({
|
||||
scheme: process.env.WEAVIATE_SCHEME || "https",
|
||||
host: process.env.WEAVIATE_HOST || "localhost",
|
||||
apiKey: process.env.WEAVIATE_API_KEY
|
||||
? // eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
new (weaviate as any).ApiKey(process.env.WEAVIATE_API_KEY)
|
||||
: undefined,
|
||||
});
|
||||
|
||||
const vectorStore = await WeaviateStore.fromDocuments(docs, embeddings, {
|
||||
client,
|
||||
indexName: "Test",
|
||||
textKey: "text",
|
||||
metadataKeys: ["year", "director", "rating", "genre"],
|
||||
});
|
||||
const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
|
||||
llm,
|
||||
vectorStore,
|
||||
documentContents,
|
||||
attributeInfo,
|
||||
/**
|
||||
* We need to use a translator that translates the queries into a
|
||||
* filter format that the vector store can understand. LangChain provides one here.
|
||||
*/
|
||||
structuredQueryTranslator: new WeaviateTranslator(),
|
||||
});
|
||||
|
||||
/**
|
||||
* Now we can query the vector store.
|
||||
* We can ask questions like "Which movies are less than 90 minutes?" or "Which movies are rated higher than 8.5?".
|
||||
* We can also ask questions like "Which movies are either comedy or drama and are less than 90 minutes?".
|
||||
* The retriever will automatically convert these questions into queries that can be used to retrieve documents.
|
||||
*
|
||||
* Note that unlike other vector stores, you have to make sure each metadata keys are actually presnt in the database,
|
||||
* meaning that Weaviate will throw an error if the self query chain generate a query with a metadata key that does
|
||||
* not exist in your Weaviate database.
|
||||
*/
|
||||
const query1 = await selfQueryRetriever.getRelevantDocuments(
|
||||
"Which movies are rated higher than 8.5?"
|
||||
);
|
||||
const query2 = await selfQueryRetriever.getRelevantDocuments(
|
||||
"Which movies are directed by Greta Gerwig?"
|
||||
);
|
||||
console.log(query1, query2);
|
||||
@@ -334,6 +334,9 @@ retrievers/self_query/pinecone.d.ts
|
||||
retrievers/self_query/supabase.cjs
|
||||
retrievers/self_query/supabase.js
|
||||
retrievers/self_query/supabase.d.ts
|
||||
retrievers/self_query/weaviate.cjs
|
||||
retrievers/self_query/weaviate.js
|
||||
retrievers/self_query/weaviate.d.ts
|
||||
retrievers/vespa.cjs
|
||||
retrievers/vespa.js
|
||||
retrievers/vespa.d.ts
|
||||
|
||||
@@ -346,6 +346,9 @@
|
||||
"retrievers/self_query/supabase.cjs",
|
||||
"retrievers/self_query/supabase.js",
|
||||
"retrievers/self_query/supabase.d.ts",
|
||||
"retrievers/self_query/weaviate.cjs",
|
||||
"retrievers/self_query/weaviate.js",
|
||||
"retrievers/self_query/weaviate.d.ts",
|
||||
"retrievers/vespa.cjs",
|
||||
"retrievers/vespa.js",
|
||||
"retrievers/vespa.d.ts",
|
||||
@@ -1314,6 +1317,11 @@
|
||||
"import": "./retrievers/self_query/supabase.js",
|
||||
"require": "./retrievers/self_query/supabase.cjs"
|
||||
},
|
||||
"./retrievers/self_query/weaviate": {
|
||||
"types": "./retrievers/self_query/weaviate.d.ts",
|
||||
"import": "./retrievers/self_query/weaviate.js",
|
||||
"require": "./retrievers/self_query/weaviate.cjs"
|
||||
},
|
||||
"./retrievers/vespa": {
|
||||
"types": "./retrievers/vespa.d.ts",
|
||||
"import": "./retrievers/vespa.js",
|
||||
|
||||
@@ -140,6 +140,7 @@ const entrypoints = {
|
||||
"retrievers/self_query/functional": "retrievers/self_query/functional",
|
||||
"retrievers/self_query/pinecone": "retrievers/self_query/pinecone",
|
||||
"retrievers/self_query/supabase": "retrievers/self_query/supabase",
|
||||
"retrievers/self_query/weaviate": "retrievers/self_query/weaviate",
|
||||
"retrievers/vespa": "retrievers/vespa",
|
||||
// cache
|
||||
cache: "cache/index",
|
||||
@@ -245,6 +246,7 @@ const requiresOptionalDependency = [
|
||||
"retrievers/self_query/functional",
|
||||
"retrievers/self_query/pinecone",
|
||||
"retrievers/self_query/supabase",
|
||||
"retrievers/self_query/weaviate",
|
||||
"output_parsers/expression",
|
||||
"chains/query_constructor",
|
||||
"chains/query_constructor/ir",
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
/* eslint-disable no-process-env */
|
||||
import { test } from "@jest/globals";
|
||||
import weaviate from "weaviate-ts-client";
|
||||
import { Document } from "../../../document.js";
|
||||
import { AttributeInfo } from "../../../schema/query_constructor.js";
|
||||
import { OpenAIEmbeddings } from "../../../embeddings/openai.js";
|
||||
import { SelfQueryRetriever } from "../index.js";
|
||||
import { OpenAI } from "../../../llms/openai.js";
|
||||
import { WeaviateStore } from "../../../vectorstores/weaviate.js";
|
||||
import { WeaviateTranslator } from "../weaviate.js";
|
||||
|
||||
test("Weaviate Self Query Retriever Test", async () => {
|
||||
const docs = [
|
||||
new Document({
|
||||
pageContent:
|
||||
"A bunch of scientists bring back dinosaurs and mayhem breaks loose",
|
||||
metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
|
||||
metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
|
||||
metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"A bunch of normal-sized women are supremely wholesome and some men pine after them",
|
||||
metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
|
||||
}),
|
||||
new Document({
|
||||
pageContent: "Toys come alive and have a blast doing so",
|
||||
metadata: { year: 1995, genre: "animated" },
|
||||
}),
|
||||
new Document({
|
||||
pageContent:
|
||||
"Three men walk into the Zone, three men walk out of the Zone",
|
||||
metadata: {
|
||||
year: 1979,
|
||||
director: "Andrei Tarkovsky",
|
||||
genre: "science fiction",
|
||||
rating: 9.9,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
const attributeInfo: AttributeInfo[] = [
|
||||
{
|
||||
name: "genre",
|
||||
description: "The genre of the movie",
|
||||
type: "string or array of strings",
|
||||
},
|
||||
{
|
||||
name: "year",
|
||||
description: "The year the movie was released",
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
name: "director",
|
||||
description: "The director of the movie",
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
name: "rating",
|
||||
description: "The rating of the movie (1-10)",
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
name: "length",
|
||||
description: "The length of the movie in minutes",
|
||||
type: "number",
|
||||
},
|
||||
];
|
||||
|
||||
const embeddings = new OpenAIEmbeddings();
|
||||
const llm = new OpenAI({
|
||||
modelName: "gpt-3.5-turbo",
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const client = (weaviate as any).client({
|
||||
scheme: process.env.WEAVIATE_SCHEME || "https",
|
||||
host: process.env.WEAVIATE_HOST || "localhost",
|
||||
apiKey: process.env.WEAVIATE_API_KEY
|
||||
? // eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
new (weaviate as any).ApiKey(process.env.WEAVIATE_API_KEY)
|
||||
: undefined,
|
||||
});
|
||||
|
||||
const documentContents = "Brief summary of a movie";
|
||||
const vectorStore = await WeaviateStore.fromDocuments(docs, embeddings, {
|
||||
client,
|
||||
indexName: "Test",
|
||||
textKey: "text",
|
||||
metadataKeys: ["year", "director", "rating", "genre"],
|
||||
});
|
||||
const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
|
||||
llm,
|
||||
vectorStore,
|
||||
documentContents,
|
||||
attributeInfo,
|
||||
structuredQueryTranslator: new WeaviateTranslator(),
|
||||
});
|
||||
|
||||
const query2 = await selfQueryRetriever.getRelevantDocuments(
|
||||
"Which movies are rated higher than 8.5?"
|
||||
);
|
||||
const query3 = await selfQueryRetriever.getRelevantDocuments(
|
||||
"Which movies are directed by Greta Gerwig?"
|
||||
);
|
||||
console.log(query2, query3);
|
||||
});
|
||||
@@ -0,0 +1,156 @@
|
||||
import {
|
||||
Comparator,
|
||||
Comparators,
|
||||
Comparison,
|
||||
NOT,
|
||||
Operation,
|
||||
Operator,
|
||||
Operators,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
} from "../../chains/query_constructor/ir.js";
|
||||
import { BaseTranslator } from "./base.js";
|
||||
|
||||
type AllowedOperator = Exclude<Operator, NOT>;
|
||||
|
||||
type WeaviateOperatorValues = {
|
||||
valueText: string;
|
||||
valueInt: number;
|
||||
valueNumber: number;
|
||||
valueBoolean: boolean;
|
||||
};
|
||||
|
||||
type WeaviateOperatorKeys = keyof WeaviateOperatorValues;
|
||||
|
||||
type ExclusiveOperatorValue = {
|
||||
[L in WeaviateOperatorKeys]: {
|
||||
[key in L]: WeaviateOperatorValues[key];
|
||||
} & Omit<{ [key in WeaviateOperatorKeys]?: never }, L>;
|
||||
}[WeaviateOperatorKeys];
|
||||
|
||||
export type WeaviateVisitorResult =
|
||||
| WeaviateOperationResult
|
||||
| WeaviateComparisonResult
|
||||
| WeaviateStructuredQueryResult;
|
||||
|
||||
export type WeaviateOperationResult = {
|
||||
operator: string;
|
||||
operands: WeaviateVisitorResult[];
|
||||
};
|
||||
export type WeaviateComparisonResult = {
|
||||
path: [string];
|
||||
operator: string;
|
||||
} & ExclusiveOperatorValue;
|
||||
|
||||
export type WeaviateStructuredQueryResult = {
|
||||
filter?:
|
||||
| WeaviateComparisonResult
|
||||
| WeaviateOperationResult
|
||||
| WeaviateStructuredQueryResult;
|
||||
};
|
||||
|
||||
export class WeaviateTranslator extends BaseTranslator {
|
||||
declare VisitOperationOutput: WeaviateOperationResult;
|
||||
|
||||
declare VisitComparisonOutput: WeaviateComparisonResult;
|
||||
|
||||
declare VisitStructuredQueryOutput: WeaviateStructuredQueryResult;
|
||||
|
||||
allowedOperators: Operator[] = [Operators.and, Operators.or];
|
||||
|
||||
allowedComparators: Comparator[] = [
|
||||
Comparators.eq,
|
||||
Comparators.ne,
|
||||
Comparators.lt,
|
||||
Comparators.lte,
|
||||
Comparators.gt,
|
||||
Comparators.gte,
|
||||
];
|
||||
|
||||
formatFunction(func: Operator | Comparator): string {
|
||||
if (func in Comparators) {
|
||||
if (
|
||||
this.allowedComparators.length > 0 &&
|
||||
this.allowedComparators.indexOf(func as Comparator) === -1
|
||||
) {
|
||||
throw new Error(
|
||||
`Comparator ${func} not allowed. Allowed operators: ${this.allowedComparators.join(
|
||||
", "
|
||||
)}`
|
||||
);
|
||||
}
|
||||
} else if (func in Operators) {
|
||||
if (
|
||||
this.allowedOperators.length > 0 &&
|
||||
this.allowedOperators.indexOf(func as Operator) === -1
|
||||
) {
|
||||
throw new Error(
|
||||
`Operator ${func} not allowed. Allowed operators: ${this.allowedOperators.join(
|
||||
", "
|
||||
)}`
|
||||
);
|
||||
}
|
||||
} else {
|
||||
throw new Error("Unknown comparator or operator");
|
||||
}
|
||||
const dict = {
|
||||
and: "And",
|
||||
or: "Or",
|
||||
eq: "Equal",
|
||||
ne: "NotEqual",
|
||||
lt: "LessThan",
|
||||
lte: "LessThanEqual",
|
||||
gt: "GreaterThan",
|
||||
gte: "GreaterThanEqual",
|
||||
};
|
||||
return dict[func as Comparator | AllowedOperator];
|
||||
}
|
||||
|
||||
visitOperation(operation: Operation): this["VisitOperationOutput"] {
|
||||
const args = operation.args?.map((arg) =>
|
||||
arg.accept(this as Visitor)
|
||||
) as WeaviateVisitorResult[];
|
||||
return {
|
||||
operator: this.formatFunction(operation.operator),
|
||||
operands: args,
|
||||
};
|
||||
}
|
||||
|
||||
visitComparison(comparison: Comparison): this["VisitComparisonOutput"] {
|
||||
if (typeof comparison.value === "string") {
|
||||
return {
|
||||
path: [comparison.attribute],
|
||||
operator: this.formatFunction(comparison.comparator),
|
||||
valueText: comparison.value,
|
||||
};
|
||||
}
|
||||
if (typeof comparison.value === "number") {
|
||||
if (Number.isInteger(comparison.value)) {
|
||||
return {
|
||||
path: [comparison.attribute],
|
||||
operator: this.formatFunction(comparison.comparator),
|
||||
valueInt: comparison.value,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
path: [comparison.attribute],
|
||||
operator: this.formatFunction(comparison.comparator),
|
||||
valueNumber: comparison.value,
|
||||
};
|
||||
}
|
||||
}
|
||||
throw new Error("Value type is not supported");
|
||||
}
|
||||
|
||||
visitStructuredQuery(
|
||||
query: StructuredQuery
|
||||
): this["VisitStructuredQueryOutput"] {
|
||||
let nextArg = {};
|
||||
if (query.filter) {
|
||||
nextArg = {
|
||||
filter: { where: query.filter.accept(this as Visitor) },
|
||||
};
|
||||
}
|
||||
return nextArg;
|
||||
}
|
||||
}
|
||||
@@ -138,6 +138,7 @@
|
||||
"src/retrievers/self_query/functional.ts",
|
||||
"src/retrievers/self_query/pinecone.ts",
|
||||
"src/retrievers/self_query/supabase.ts",
|
||||
"src/retrievers/self_query/weaviate.ts",
|
||||
"src/retrievers/vespa.ts",
|
||||
"src/cache/index.ts",
|
||||
"src/cache/momento.ts",
|
||||
|
||||
Reference in New Issue
Block a user