Polish template (#3)

Co-authored-by: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>
2026-07-01 15:24:18 -04:00 · 2024-09-18 17:33:36 -07:00
parent 4451066f7d
commit 0b37680e4c
13 changed files with 1544 additions and 140 deletions
@@ -8,6 +8,10 @@ ANTHROPIC_API_KEY=....
 FIREWORKS_API_KEY=...
 OPENAI_API_KEY=...

+# Embeddings choice
+OPENAI_API_KEY=...
+COHERE_API_KEY=...
+
 # Retrieval provider

 ## Elastic cloud:
@@ -23,5 +27,5 @@ ELASTICSEARCH_PASSWORD=changeme
 PINECONE_API_KEY=...
 PINECONE_INDEX_NAME=...

-## Mongo Atlas
+## MongoDB Atlas
 MONGODB_URI=... # Full connection string
@@ -0,0 +1,62 @@
+module.exports = {
+  extends: [
+    "eslint:recommended",
+    "prettier",
+    "plugin:@typescript-eslint/recommended",
+  ],
+  parserOptions: {
+    ecmaVersion: 12,
+    parser: "@typescript-eslint/parser",
+    project: "./tsconfig.json",
+    sourceType: "module",
+  },
+  plugins: ["import", "@typescript-eslint", "no-instanceof"],
+  ignorePatterns: [
+    ".eslintrc.cjs",
+    "scripts",
+    "node_modules",
+    "dist",
+    "dist-cjs",
+    "*.js",
+    "*.cjs",
+    "*.d.ts",
+  ],
+  rules: {
+    "no-process-env": 0,
+    "no-instanceof/no-instanceof": 2,
+    "@typescript-eslint/explicit-module-boundary-types": 0,
+    "@typescript-eslint/no-empty-function": 0,
+    "@typescript-eslint/no-non-null-assertion": 0,
+    "@typescript-eslint/no-shadow": 0,
+    "@typescript-eslint/no-empty-interface": 0,
+    "@typescript-eslint/no-use-before-define": ["error", "nofunc"],
+    "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }],
+    "@typescript-eslint/no-floating-promises": "error",
+    "@typescript-eslint/no-misused-promises": "error",
+    camelcase: 0,
+    "class-methods-use-this": 0,
+    "import/extensions": [2, "ignorePackages"],
+    "import/no-extraneous-dependencies": [
+      "error",
+      { devDependencies: ["**/*.test.ts"] },
+    ],
+    "import/no-unresolved": 0,
+    "import/prefer-default-export": 0,
+    "keyword-spacing": "error",
+    "max-classes-per-file": 0,
+    "max-len": 0,
+    "no-await-in-loop": 0,
+    "no-bitwise": 0,
+    "no-console": 0,
+    "no-restricted-syntax": 0,
+    "no-shadow": 0,
+    "no-continue": 0,
+    "no-underscore-dangle": 0,
+    "no-use-before-define": 0,
+    "no-useless-constructor": 0,
+    "no-return-await": 0,
+    "consistent-return": 0,
+    "no-else-return": 0,
+    "new-cap": ["error", { properties: false, capIsNew: false }],
+  },
+};
@@ -32,7 +32,6 @@ Assuming you have already [installed LangGraph Studio](https://github.com/langch

 ```bash
   cp .env.example .env
-
 ```

 2. Select your retriever & index, and save the access instructions to your `.env` file.
@@ -83,28 +82,6 @@ ELASTICSEARCH_USER=elastic
 ELASTICSEARCH_PASSWORD=changeme
 ```

-### MongoDB Atlas
-
-MongoDB Atlas is a fully-managed cloud database that includes vector search capabilities for AI-powered applications.
-
-1. Create a free Atlas cluster:
-
- Go to the [MongoDB Atlas website](https://www.mongodb.com/cloud/atlas/register) and sign up for a free account.
- After logging in, create a free cluster by following the on-screen instructions.
-
-2. Set up your environment:
-
- In the Atlas dashboard, click on "Connect" for your cluster.
- Choose "Connect your application" and copy the provided connection string.
- Create a `.env` file in your project root if you haven't already.
- Add your MongoDB Atlas connection string to the `.env` file:
-
-```
-MONGODB_URI="mongodb+srv://username:password@your-cluster-url.mongodb.net/?retryWrites=true&w=majority&appName=your-cluster-name"
-```
-
-Replace `username`, `password`, `your-cluster-url`, and `your-cluster-name` with your actual credentials and cluster information.
-
 ### Pinecone Serverless

 Pinecone is a managed, cloud-native vector database that provides long-term memory for high-performance AI applications.
@@ -128,6 +105,49 @@ PINECONE_API_KEY=your-api-key
 PINECONE_INDEX_NAME=your-index-name
 ```

+### MongoDB Atlas
+
+MongoDB Atlas is a fully-managed cloud database that includes vector search capabilities for AI-powered applications.
+
+1. Create a free Atlas cluster:
+
+- Go to the [MongoDB Atlas website](https://www.mongodb.com/cloud/atlas/register) and sign up for a free account.
+- After logging in, create a free cluster by following the on-screen instructions.
+
+2. Set up your environment:
+
+- In the Atlas dashboard, click on "Connect" for your cluster.
+- Choose "Connect your application" and copy the provided connection string.
+- Create a `.env` file in your project root if you haven't already.
+- Add your MongoDB Atlas connection string to the `.env` file:
+
+```
+MONGODB_URI="mongodb+srv://username:password@your-cluster-url.mongodb.net/?retryWrites=true&w=majority&appName=your-cluster-name"
+```
+
+Replace `username`, `password`, `your-cluster-url`, and `your-cluster-name` with your actual credentials and cluster information.
+
+3. Set up a vector search index. After configuring your cluster, you’ll need to create an index on the collection field you want to search over.
+
+Switch to the Atlas Search tab and click Create Search Index. From there, make sure you select Atlas Vector Search - JSON Editor, then select the appropriate database and collection and paste the following into the textbox:
+
+```json
+{
+  "fields": [
+    {
+      "numDimensions": 1536,
+      "path": "embedding",
+      "similarity": "euclidean",
+      "type": "vector"
+    }
+  ]
+}
+```
+
+Note that the dimensions property should match the dimensionality of the embeddings you are using. For example, Cohere embeddings have 1024 dimensions, and by default OpenAI embeddings have 1536.
+
+[See this page](https://js.langchain.com/docs/integrations/vectorstores/mongodb_atlas/#setup) for more detailed information.
+
 </details>

 <details>
@@ -338,7 +358,8 @@ Configuration auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
                "PINECONE_INDEX_NAME"
              ]
            }
-          ]
+          ],
+          "type": "string"
        }
      }
    },
@@ -430,7 +451,8 @@ Configuration auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
                "PINECONE_INDEX_NAME"
              ]
            }
-          ]
+          ],
+          "type": "string"
        },
        "responseModel": {
          "type": "string",
@@ -675,4 +697,3 @@ Configuration auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
  }
 }
 -->
-
@@ -12,4 +12,7 @@ export default {
    ],
  },
  extensionsToTreatAsEsm: [".ts"],
+  setupFiles: ["dotenv/config"],
+  passWithNoTests: true,
+  testTimeout: 20_000,
 };
@@ -1,17 +1,19 @@
 {
  "name": "retrieval-graph",
  "version": "0.0.1",
-  "description": "A starter template for creating a LangGraph workflow.",
+  "description": "A starter template containing an example retrieval agent.",
  "main": "src/retrieval_graph/graph.ts",
  "author": "Your Name",
  "license": "MIT",
  "private": true,
  "type": "module",
+  "packageManager": "yarn@1.22.22",
  "scripts": {
    "build": "tsc",
    "clean": "rm -rf dist",
    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.test\\.ts$ --testPathIgnorePatterns=\\.int\\.test\\.ts$",
    "test:int": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.int\\.test\\.ts$",
+    "lint": "eslint src",
    "format": "prettier --write ."
  },
  "dependencies": {
@@ -22,20 +24,32 @@
    "@langchain/core": "^0.3.2",
    "@langchain/langgraph": "^0.2.8",
    "@langchain/mongodb": "^0.1.0",
+    "@langchain/openai": "^0.3.0",
    "@langchain/pinecone": "^0.1.0",
+    "@pinecone-database/pinecone": "^3.0.3",
    "langchain": "^0.3.2",
-    "ts-node": "^10.9.2"
-  },
-  "resolutions": {
-    "@langchain/core": "^0.3.0"
+    "mongodb": "^6.9.0",
+    "ts-node": "^10.9.2",
+    "uuid": "^10.0.0",
+    "zod": "^3.23.8"
  },
  "devDependencies": {
-    "@langchain/openai": "^0.3.0",
+    "@eslint/eslintrc": "^3.1.0",
+    "@eslint/js": "^9.9.1",
+    "@jest/globals": "^29.7.0",
    "@tsconfig/recommended": "^1.0.7",
-    "@types/jest": "^29.5.13",
+    "@types/jest": "^29.5.0",
+    "@typescript-eslint/eslint-plugin": "^5.59.8",
+    "@typescript-eslint/parser": "^5.59.8",
+    "dotenv": "^16.4.5",
+    "eslint": "^8.41.0",
+    "eslint-config-prettier": "^8.8.0",
+    "eslint-plugin-import": "^2.27.5",
+    "eslint-plugin-no-instanceof": "^1.0.1",
+    "eslint-plugin-prettier": "^4.2.1",
    "jest": "^29.7.0",
    "prettier": "^3.3.3",
-    "ts-jest": "^29.2.5",
-    "typescript": "^5.6.2"
+    "ts-jest": "^29.1.0",
+    "typescript": "^5.3.3"
  }
 }
@@ -1,7 +1,6 @@
 /**
 * Define the configurable parameters for the agent.
 */
-
 import { RunnableConfig } from "@langchain/core/runnables";
 import {
  RESPONSE_SYSTEM_PROMPT_TEMPLATE,
@@ -20,12 +19,12 @@ export const IndexConfigurationAnnotation = Annotation.Root({
  /**
   * Unique identifier for the user.
   */
-  userId: Annotation<string>(),
+  userId: Annotation<string>,

  /**
   * Name of the embedding model to use. Must be a valid embedding model name.
   */
-  embeddingModel: Annotation<string>(),
+  embeddingModel: Annotation<string>,

  /**
   * The vector store provider to use for retrieval.
@@ -33,12 +32,13 @@ export const IndexConfigurationAnnotation = Annotation.Root({
   */
  retrieverProvider: Annotation<
    "elastic" | "elastic-local" | "pinecone" | "mongodb"
-  >(),
+  >,

  /**
   * Additional keyword arguments to pass to the search function of the retriever.
   */
-  searchKwargs: Annotation<Record<string, any>>(),
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  searchKwargs: Annotation<Record<string, any>>,
 });

 /**
@@ -57,7 +57,7 @@ export function ensureIndexConfiguration(
    userId: configurable.userId || "default", // Give a default user for shared docs
    embeddingModel:
      configurable.embeddingModel || "openai/text-embedding-3-small",
-    retrieverProvider: configurable.retrieverProvider || "mongodb",
+    retrieverProvider: configurable.retrieverProvider || "elastic",
    searchKwargs: configurable.searchKwargs || {},
  };
 }
@@ -1,4 +1,3 @@
-import { ChatPromptTemplate } from "@langchain/core/prompts";
 import { RunnableConfig } from "@langchain/core/runnables";
 import { StateGraph } from "@langchain/langgraph";
 import {
@@ -17,8 +16,8 @@ const SearchQuery = z.object({

 async function generateQuery(
  state: typeof StateAnnotation.State,
-  config?: RunnableConfig
-): Promise<{ queries: string[] }> {
+  config?: RunnableConfig,
+): Promise<typeof StateAnnotation.Update> {
  const messages = state.messages;
  if (messages.length === 1) {
    // It's the first user question. We will use the input directly to search.
@@ -27,23 +26,19 @@ async function generateQuery(
  } else {
    const configuration = ensureConfiguration(config);
    // Feel free to customize the prompt, model, and other logic!
-    const prompt = ChatPromptTemplate.fromMessages([
-      ["system", configuration.querySystemPromptTemplate],
-      ["placeholder", "{messages}"],
-    ]);
+    const systemMessage = configuration.querySystemPromptTemplate
+      .replace("{queries}", (state.queries || []).join("\n- "))
+      .replace("{systemTime}", new Date().toISOString());
+
+    const messageValue = [
+      { role: "system", content: systemMessage },
+      ...state.messages,
+    ];
    const model = (
      await loadChatModel(configuration.responseModel)
    ).withStructuredOutput(SearchQuery);

-    const messageValue = await prompt.invoke(
-      {
-        ...state,
-        queries: (state.queries || []).join("\n- "),
-        systemTime: new Date().toISOString(),
-      },
-      config
-    );
-    const generated = await model.invoke(messageValue, config);
+    const generated = await model.invoke(messageValue);
    return {
      queries: [generated.query],
    };
@@ -52,54 +47,47 @@ async function generateQuery(

 async function retrieve(
  state: typeof StateAnnotation.State,
-  config: RunnableConfig
-): Promise<{ retrievedDocs: any[] }> {
+  config: RunnableConfig,
+): Promise<typeof StateAnnotation.Update> {
  const query = state.queries[state.queries.length - 1];
  const retriever = await makeRetriever(config);
-  const firstResopnse = await retriever.vectorStore.similaritySearch(query);
-  console.log("FIRSTRESP", firstResopnse);
  const response = await retriever.invoke(query);
  return { retrievedDocs: response };
 }

 async function respond(
  state: typeof StateAnnotation.State,
-  config: RunnableConfig
-) {
+  config: RunnableConfig,
+): Promise<typeof StateAnnotation.Update> {
  /**
   * Call the LLM powering our "agent".
   */
  const configuration = ensureConfiguration(config);
-  // Feel free to customize the prompt, model, and other logic!
-  const prompt = ChatPromptTemplate.fromMessages([
-    ["system", configuration.responseSystemPromptTemplate],
-    ["placeholder", "{messages}"],
-  ]);
+
  const model = await loadChatModel(configuration.responseModel);

  const retrievedDocs = formatDocs(state.retrievedDocs);
-  const messageValue = await prompt.invoke(
-    {
-      ...state,
-      retrievedDocs,
-      systemTime: new Date().toISOString(),
-    },
-    config
-  );
-  const response = await model.invoke(messageValue, config);
+  // Feel free to customize the prompt, model, and other logic!
+  const systemMessage = configuration.responseSystemPromptTemplate
+    .replace("{retrievedDocs}", retrievedDocs)
+    .replace("{systemTime}", new Date().toISOString());
+  const messageValue = [
+    { role: "system", content: systemMessage },
+    ...state.messages,
+  ];
+  const response = await model.invoke(messageValue);
  // We return a list, because this will get added to the existing list
  return { messages: [response] };
 }

-// Define a new graph (It's just a pipe)
-
+// Lay out the nodes and edges to define a graph
 const builder = new StateGraph(
  {
    stateSchema: StateAnnotation,
-    // Just the user
+    // The only input field is the user
    input: InputStateAnnotation,
-  }
-  // ConfigurationAnnotation
+  },
+  ConfigurationAnnotation,
 )
  .addNode("generateQuery", generateQuery)
  .addNode("retrieve", retrieve)
@@ -7,13 +7,15 @@ import { RunnableConfig } from "@langchain/core/runnables";
 import { StateGraph } from "@langchain/langgraph";

 import { IndexStateAnnotation } from "./state.js";
-// import { IndexConfigurationAnnotation } from "./configuration.js";
 import { makeRetriever } from "./retrieval.js";
-import { ensureIndexConfiguration } from "./configuration.js";
+import {
+  ensureIndexConfiguration,
+  IndexConfigurationAnnotation,
+} from "./configuration.js";

 function ensureDocsHaveUserId(
  docs: Document[],
-  config: RunnableConfig
+  config: RunnableConfig,
 ): Document[] {
  const configuration = ensureIndexConfiguration(config);
  const userId = configuration.userId;
@@ -27,8 +29,8 @@ function ensureDocsHaveUserId(

 async function indexDocs(
  state: typeof IndexStateAnnotation.State,
-  config?: RunnableConfig
-): Promise<{ docs: string }> {
+  config?: RunnableConfig,
+): Promise<typeof IndexStateAnnotation.Update> {
  if (!config) {
    throw new Error("ConfigurationAnnotation required to run index_docs.");
  }
@@ -36,17 +38,15 @@ async function indexDocs(
  const retriever = await makeRetriever(config);
  const stampedDocs = ensureDocsHaveUserId(docs, config);

-  const results = await retriever.addDocuments(stampedDocs);
+  await retriever.addDocuments(stampedDocs);
  return { docs: "delete" };
 }

 // Define a new graph

 const builder = new StateGraph(
-  {
-    stateSchema: IndexStateAnnotation,
-  }
-  // IndexConfigurationAnnotation
+  IndexStateAnnotation,
+  IndexConfigurationAnnotation,
 )
  .addNode("indexDocs", indexDocs)
  .addEdge("__start__", "indexDocs");
@@ -2,13 +2,13 @@
 * Default prompts.
 */

-export const RESPONSE_SYSTEM_PROMPT_TEMPLATE: string = `You are a helpful AI assistant. Answer the user's questions based on the retrieved documents.
+export const RESPONSE_SYSTEM_PROMPT_TEMPLATE = `You are a helpful AI assistant. Answer the user's questions based on the retrieved documents.

 {retrievedDocs}

 System time: {systemTime}`;

-export const QUERY_SYSTEM_PROMPT_TEMPLATE: string = `Generate search queries to retrieve documents that may help answer the user's question. Previously, you made the following queries:
+export const QUERY_SYSTEM_PROMPT_TEMPLATE = `Generate search queries to retrieve documents that may help answer the user's question. Previously, you made the following queries:
    
 <previous_queries/>
 {queries}
@@ -13,7 +13,7 @@ import { OpenAIEmbeddings } from "@langchain/openai";

 async function makeElasticRetriever(
  configuration: ReturnType<typeof ensureConfiguration>,
-  embeddingModel: Embeddings
+  embeddingModel: Embeddings,
 ): Promise<VectorStoreRetriever> {
  const elasticUrl = process.env.ELASTICSEARCH_URL;
  if (!elasticUrl) {
@@ -26,7 +26,7 @@ async function makeElasticRetriever(
    const password = process.env.ELASTICSEARCH_PASSWORD;
    if (!username || !password) {
      throw new Error(
-        "ELASTICSEARCH_USER or ELASTICSEARCH_PASSWORD environment variable is not defined"
+        "ELASTICSEARCH_USER or ELASTICSEARCH_PASSWORD environment variable is not defined",
      );
    }
    auth = { username, password };
@@ -34,7 +34,7 @@ async function makeElasticRetriever(
    const apiKey = process.env.ELASTICSEARCH_API_KEY;
    if (!apiKey) {
      throw new Error(
-        "ELASTICSEARCH_API_KEY environment variable is not defined"
+        "ELASTICSEARCH_API_KEY environment variable is not defined",
      );
    }
    auth = { apiKey };
@@ -49,19 +49,18 @@ async function makeElasticRetriever(
    client,
    indexName: "langchain_index",
  });
-
  const searchKwargs = configuration.searchKwargs || {};
-  searchKwargs.filter = searchKwargs.filter || [];
-  searchKwargs.filter.push({
-    term: { "metadata.user_id": configuration.userId },
-  });
+  const filter = {
+    ...searchKwargs,
+    user_id: configuration.userId,
+  };

-  return vectorStore.asRetriever({ searchKwargs });
+  return vectorStore.asRetriever({ filter });
 }

 async function makePineconeRetriever(
  configuration: ReturnType<typeof ensureConfiguration>,
-  embeddingModel: Embeddings
+  embeddingModel: Embeddings,
 ): Promise<VectorStoreRetriever> {
  const indexName = process.env.PINECONE_INDEX_NAME;
  if (!indexName) {
@@ -74,15 +73,17 @@ async function makePineconeRetriever(
  });

  const searchKwargs = configuration.searchKwargs || {};
-  searchKwargs.filter = searchKwargs.filter || {};
-  searchKwargs.filter.user_id = configuration.userId;
+  const filter = {
+    ...searchKwargs,
+    user_id: configuration.userId,
+  };

-  return vectorStore.asRetriever({ searchKwargs });
+  return vectorStore.asRetriever({ filter });
 }

 async function makeMongoDBRetriever(
  configuration: ReturnType<typeof ensureConfiguration>,
-  embeddingModel: Embeddings
+  embeddingModel: Embeddings,
 ): Promise<VectorStoreRetriever> {
  if (!process.env.MONGODB_URI) {
    throw new Error("MONGODB_URI environment variable is not defined");
@@ -105,7 +106,7 @@ async function makeMongoDBRetriever(
  return vectorStore.asRetriever({ filter: searchKwargs });
 }

-function makeTextEncoder(modelName: string): Embeddings {
+function makeTextEmbeddings(modelName: string): Embeddings {
  /**
   * Connect to the configured text encoder.
   */
@@ -129,10 +130,10 @@ function makeTextEncoder(modelName: string): Embeddings {
 }

 export async function makeRetriever(
-  config: RunnableConfig
+  config: RunnableConfig,
 ): Promise<VectorStoreRetriever> {
  const configuration = ensureConfiguration(config);
-  const embeddingModel = makeTextEncoder(configuration.embeddingModel);
+  const embeddingModel = makeTextEmbeddings(configuration.embeddingModel);

  const userId = configuration.userId;
  if (!userId) {
@@ -149,7 +150,7 @@ export async function makeRetriever(
      return makeMongoDBRetriever(configuration, embeddingModel);
    default:
      throw new Error(
-        `Unrecognized retrieverProvider in configuration: ${configuration.retrieverProvider}`
+        `Unrecognized retrieverProvider in configuration: ${configuration.retrieverProvider}`,
      );
  }
 }
@@ -2,6 +2,7 @@ import { Document } from "@langchain/core/documents";
 import { BaseMessage } from "@langchain/core/messages";
 import { Annotation, MessagesAnnotation } from "@langchain/langgraph";
 import { v4 as uuidv4 } from "uuid";
+
 /**
 * Reduces the document array based on the provided new documents or actions.
 *
@@ -13,6 +14,7 @@ export function reduceDocs(
  existing?: Document[],
  newDocs?:
    | Document[]
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    | { [key: string]: any }[]
    | string[]
    | string
@@ -66,6 +68,7 @@ export const IndexStateAnnotation = Annotation.Root({
   */
  docs: Annotation<
    Document[],
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    Document[] | { [key: string]: any }[] | string[] | string | "delete"
  >({
    reducer: reduceDocs,
@@ -5,11 +5,12 @@ import { BaseChatModel } from "@langchain/core/language_models/chat_models";
 import { initChatModel } from "langchain/chat_models/universal";

 export function getMessageText(msg: BaseMessage): string {
-  /**Get the text content of a message. */
+  /** Get the text content of a message. */
  const content = msg.content;
  if (typeof content === "string") {
    return content;
  } else {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    const txts = (content as any[]).map((c) =>
      typeof c === "string" ? c : c.text || "",
    );
@@ -35,6 +36,7 @@ export function formatDocs(docs?: Document[]): string {
  const formatted = docs.map(formatDoc).join("\n");
  return `<documents>\n${formatted}\n</documents>`;
 }
+
 /**
 * Load a chat model from a fully specified name.
 * @param fullySpecifiedName - String in the format 'provider/model' or 'provider/account/provider/model'.