fix: update ch9 js graph logic

2026-07-01 16:06:32 -04:00 · 2025-02-20 20:02:20 +00:00
parent 38cd293e65
commit 572b01e708
16 changed files with 7281 additions and 5512 deletions
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2024 Nuno Campos
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
@@ -1,8 +1,8 @@
 # Learning LangChain Code Examples

-This repository contains code examples (in python and javascript) from each chapter of the book "Learning LangChain" published by O'Reilly Media.
+This repository contains code examples (in python and javascript) from each chapter of the book ["Learning LangChain: Building AI and LLM Applications with LangChain and LangGraph"](https://www.oreilly.com/library/view/learning-langchain/9781098167271/) published by O'Reilly Media.

-To run the examples, you can clone the repository and run the examples in your preferred language.
+To run the examples, you can clone the repository and run the examples in your preferred language folders.

 ## Quick Start

@@ -0,0 +1,3 @@
+
+# LangGraph API
+.langgraph_api
@@ -6,10 +6,12 @@
        "@langchain/langgraph": "^0.2.41",
        "@langchain/openai": "^0.3.17",
        "@supabase/supabase-js": "^2.44.0",
-        "langchain": "^0.3.12"
+        "langchain": "^0.3.12",
+        "pdf-parse": "^1.1.1"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
-        "typescript": "^5.0.0"
+        "typescript": "^5.0.0",
+        "@types/pdf-parse": "^1.1.4"
    }
 }
@@ -1,17 +1,33 @@
 import { Annotation } from '@langchain/langgraph';
 import { RunnableConfig } from '@langchain/core/runnables';

-// This file contains sample documents to index, based on the following LangChain and LangGraph documentation pages:
-const DEFAULT_DOCS_FILE = './docSplits.json';
+// This path points to the directory containing the documents to index.
+const DEFAULT_DOCS_PATH = 'src/docSplits.json';

 /**
 * The configuration for the indexing process.
 */
 export const IndexConfigurationAnnotation = Annotation.Root({
  /**
-   * Path to a JSON file containing default documents to index.
+   * Path to folder containing default documents to index.
   */
-  docsFile: Annotation<string>,
+  docsPath: Annotation<string>,
+
+  /**
+   * Name of the openai embedding model to use. Must be a valid embedding model name.
+   */
+  embeddingModel: Annotation<'text-embedding-3-small'>,
+
+  /**
+   * The vector store provider to store the embeddings.
+   * Options are 'supabase', 'chroma'.
+   */
+  retrieverProvider: Annotation<'supabase' | 'chroma'>,
+
+  /**
+   * Whether to index sample documents specified in the docsPath.
+   */
+  useSampleDocs: Annotation<boolean>,
 });

 /**
@@ -27,6 +43,9 @@ export function ensureIndexConfiguration(
    typeof IndexConfigurationAnnotation.State
  >;
  return {
-    docsFile: configurable.docsFile || DEFAULT_DOCS_FILE,
+    docsPath: configurable.docsPath || DEFAULT_DOCS_PATH,
+    embeddingModel: configurable.embeddingModel || 'text-embedding-3-small',
+    retrieverProvider: configurable.retrieverProvider || 'supabase',
+    useSampleDocs: configurable.useSampleDocs || false,
  };
 }
@@ -1,19 +1,22 @@
 /**
 * This "graph" simply exposes an endpoint for a user to upload docs to be indexed.
 */
-
+import path from 'path';
+import fs from 'fs/promises';
 import { RunnableConfig } from '@langchain/core/runnables';
 import { StateGraph, END, START } from '@langchain/langgraph';
-import fs from 'fs/promises';
-
 import { IndexStateAnnotation } from './state.js';
-import { makeSupabaseRetriever } from '../shared/retrieval.js';
-import { ensureIndexConfiguration } from './configuration.js';
+import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
+import {
+  ensureIndexConfiguration,
+  IndexConfigurationAnnotation,
+} from './configuration.js';
+import { makeRetriever } from '../shared/retrieval.js';
 import { reduceDocs } from '../shared/state.js';

 async function ingestDocs(
  state: typeof IndexStateAnnotation.State,
-  config?: RunnableConfig,
+  config?: RunnableConfig
 ): Promise<typeof IndexStateAnnotation.Update> {
  if (!config) {
    throw new Error('Configuration required to run index_docs.');
@@ -22,21 +25,29 @@ async function ingestDocs(
  const configuration = ensureIndexConfiguration(config);
  let docs = state.docs;

-  if (!docs.length) {
-    const fileContent = await fs.readFile(configuration.docsFile, 'utf-8');
-    const serializedDocs = JSON.parse(fileContent);
-    docs = reduceDocs([], serializedDocs);
+  if (!docs || docs.length === 0) {
+    if (configuration.useSampleDocs) {
+      const fileContent = await fs.readFile(configuration.docsPath, 'utf-8');
+      const serializedDocs = JSON.parse(fileContent);
+      docs = reduceDocs([], serializedDocs);
+    } else {
+      throw new Error('No sample documents to index.');
+    }
+  } else {
+    docs = reduceDocs([], docs);
  }

-  const retriever = await makeSupabaseRetriever();
-  const documentIds = docs.map((doc) => doc.id);
-  await retriever.addDocuments(docs, { ids: documentIds });
+  const retriever = await makeRetriever(config);
+  await retriever.addDocuments(docs);

  return { docs: 'delete' };
 }

 // Define the graph
-const builder = new StateGraph(IndexStateAnnotation)
+const builder = new StateGraph(
+  IndexStateAnnotation,
+  IndexConfigurationAnnotation
+)
  .addNode('ingestDocs', ingestDocs)
  .addEdge(START, 'ingestDocs')
  .addEdge('ingestDocs', END);
@@ -0,0 +1,36 @@
+import { Annotation } from '@langchain/langgraph';
+import {
+  BaseConfigurationAnnotation,
+  ensureBaseConfiguration,
+} from '../shared/configuration.js';
+import { RunnableConfig } from '@langchain/core/runnables';
+
+export const AgentConfigurationAnnotation = Annotation.Root({
+  ...BaseConfigurationAnnotation.spec,
+
+  // models
+  /**
+   * The language model used for processing and refining queries.
+   * Should be in the form: provider/model-name.
+   */
+  queryModel: Annotation<string>,
+});
+
+/**
+ * Create a typeof ConfigurationAnnotation.State instance from a RunnableConfig object.
+ *
+ * @param config - The configuration object to use.
+ * @returns An instance of typeof ConfigurationAnnotation.State with the specified configuration.
+ */
+export function ensureAgentConfiguration(
+  config: RunnableConfig
+): typeof AgentConfigurationAnnotation.State {
+  const configurable = (config?.configurable || {}) as Partial<
+    typeof AgentConfigurationAnnotation.State
+  >;
+  const baseConfig = ensureBaseConfiguration(config);
+  return {
+    ...baseConfig,
+    queryModel: configurable.queryModel || 'openai/gpt-4o',
+  };
+}
@@ -5,18 +5,24 @@ import {
  MessagesAnnotation,
 } from '@langchain/langgraph';
 import { AgentStateAnnotation } from './state.js';
-import { makeSupabaseRetriever } from '../shared/retrieval.js';
+import { makeRetriever, makeSupabaseRetriever } from '../shared/retrieval.js';
 import { ChatOpenAI } from '@langchain/openai';
 import { formatDocs } from './utils.js';
 import { ChatPromptTemplate } from '@langchain/core/prompts';
 import { pull } from 'langchain/hub';
-import { BaseMessage, HumanMessage } from '@langchain/core/messages';
+import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
 import { z } from 'zod';
-
+import { RunnableConfig } from '@langchain/core/runnables';
+import { loadChatModel } from '../shared/utils.js';
+import {
+  AgentConfigurationAnnotation,
+  ensureAgentConfiguration,
+} from './configuration.js';
 async function checkQueryType(
  state: typeof AgentStateAnnotation.State,
+  config: RunnableConfig
 ): Promise<{
-  route: 'retrieveDocuments' | typeof END;
+  route: 'retrieve' | 'direct';
  messages?: BaseMessage[];
 }> {
  //schema for routing
@@ -25,10 +31,8 @@ async function checkQueryType(
    directAnswer: z.string().optional(),
  });

-  const model = new ChatOpenAI({
-    model: 'gpt-4',
-    temperature: 0,
-  }).withStructuredOutput(schema);
+  const configuration = ensureAgentConfiguration(config);
+  const model = await loadChatModel(configuration.queryModel);

  const routingPrompt = ChatPromptTemplate.fromMessages([
    [
@@ -42,53 +46,60 @@ async function checkQueryType(
    query: state.query,
  });

-  const response = await model.invoke(formattedPrompt);
+  const response = await model
+    .withStructuredOutput(schema)
+    .invoke(formattedPrompt.toString());
+
  const route = response.route;

-  if (route === 'retrieve') {
-    return { route: 'retrieveDocuments' };
-  } else {
-    const directAnswer = response.directAnswer ?? '';
+  return { route };
+}

-    return {
-      route: END,
-      messages: [new HumanMessage(directAnswer)],
-    };
-  }
+async function answerQueryDirectly(
+  state: typeof AgentStateAnnotation.State,
+  config: RunnableConfig
+): Promise<typeof AgentStateAnnotation.Update> {
+  const configuration = ensureAgentConfiguration(config);
+  const model = await loadChatModel(configuration.queryModel);
+  const userHumanMessage = new HumanMessage(state.query);
+
+  const response = await model.invoke([userHumanMessage]);
+  return { messages: [userHumanMessage, response] };
 }

 async function routeQuery(
-  state: typeof AgentStateAnnotation.State,
-): Promise<'retrieveDocuments' | typeof END> {
+  state: typeof AgentStateAnnotation.State
+): Promise<'retrieveDocuments' | 'directAnswer'> {
  const route = state.route;
  if (!route) {
    throw new Error('Route is not set');
  }
-
-  if (route === 'retrieveDocuments') {
+  if (route === 'retrieve') {
    return 'retrieveDocuments';
+  } else if (route === 'direct') {
+    return 'directAnswer';
  } else {
-    return END;
+    throw new Error('Invalid route');
  }
 }

 async function retrieveDocuments(
  state: typeof AgentStateAnnotation.State,
+  config: RunnableConfig
 ): Promise<typeof AgentStateAnnotation.Update> {
-  const retriever = await makeSupabaseRetriever();
-  const response = await retriever.invoke(state.query);
-
+  const retriever = await makeRetriever(config);
+  const response = await retriever.invoke(state.query, config);
  return { documents: response };
 }

 async function generateResponse(
  state: typeof AgentStateAnnotation.State,
+  config: RunnableConfig
 ): Promise<typeof AgentStateAnnotation.Update> {
  const context = formatDocs(state.documents);
-  const model = new ChatOpenAI({
-    model: 'gpt-4o',
-    temperature: 0,
-  });
+  const configuration = ensureAgentConfiguration(config);
+
+  const model = await loadChatModel(configuration.queryModel);
  const promptTemplate = await pull<ChatPromptTemplate>('rlm/rag-prompt');

  const formattedPrompt = await promptTemplate.invoke({
@@ -96,24 +107,36 @@ async function generateResponse(
    question: state.query,
  });

-  const messages = [
-    new HumanMessage(formattedPrompt.toString()),
-    ...state.messages,
-  ];
+  const userHumanMessage = new HumanMessage(state.query);

-  const response = await model.invoke(messages);
+  // Create a human message with the formatted prompt that includes context
+  const formattedPromptMessage = new HumanMessage(formattedPrompt.toString());

-  return { messages: response };
+  const messageHistory = [...state.messages, formattedPromptMessage];
+
+  // Let MessagesAnnotation handle the message history
+  const response = await model.invoke(messageHistory);
+
+  // Return both the current query and the AI response to be handled by MessagesAnnotation's reducer
+  return { messages: [userHumanMessage, response] };
 }

-const builder = new StateGraph(AgentStateAnnotation)
+const builder = new StateGraph(
+  AgentStateAnnotation,
+  AgentConfigurationAnnotation
+)
  .addNode('retrieveDocuments', retrieveDocuments)
  .addNode('generateResponse', generateResponse)
  .addNode('checkQueryType', checkQueryType)
+  .addNode('directAnswer', answerQueryDirectly)
  .addEdge(START, 'checkQueryType')
-  .addConditionalEdges('checkQueryType', routeQuery, ['retrieveDocuments', END])
+  .addConditionalEdges('checkQueryType', routeQuery, [
+    'retrieveDocuments',
+    'directAnswer',
+  ])
  .addEdge('retrieveDocuments', 'generateResponse')
-  .addEdge('generateResponse', END);
+  .addEdge('generateResponse', END)
+  .addEdge('directAnswer', END);

 export const graph = builder.compile().withConfig({
  runName: 'RetrievalGraph',
@@ -0,0 +1,59 @@
+/**
+ * Define the configurable parameters for the agent.
+ */
+
+import { Annotation } from '@langchain/langgraph';
+import { RunnableConfig } from '@langchain/core/runnables';
+
+/**
+ * typeof ConfigurationAnnotation.State class for indexing and retrieval operations.
+ *
+ * @property embeddingModel - The name of the openai embedding model to use.
+ * @property retrieverProvider - The vector store provider to use for retrieval.
+ * @property filter - Optional filter criteria to limit the items retrieved based on the specified filter type.
+ * @property k - The number of results to return from the retriever.
+ */
+
+export const BaseConfigurationAnnotation = Annotation.Root({
+  /**
+   * Name of the openai embedding model to use. Must be a valid embedding model name.
+   */
+  embeddingModel: Annotation<'text-embedding-3-small'>,
+
+  /**
+   * The vector store provider to use for retrieval.
+   * Options are 'supabase', 'chroma'.
+   */
+  retrieverProvider: Annotation<'supabase' | 'chroma'>,
+
+  /**
+   * Optional filter criteria to limit the items retrieved.
+   * Can be any metadata object that matches document metadata structure.
+   */
+  filter: Annotation<Record<string, any> | undefined>,
+
+  /**
+   * The number of results to return from the retriever.
+   */
+  k: Annotation<number>,
+});
+
+/**
+ * Create an typeof BaseConfigurationAnnotation.State instance from a RunnableConfig object.
+ *
+ * @param config - The configuration object to use.
+ * @returns An instance of typeof BaseConfigurationAnnotation.State with the specified configuration.
+ */
+export function ensureBaseConfiguration(
+  config: RunnableConfig
+): typeof BaseConfigurationAnnotation.State {
+  const configurable = (config?.configurable || {}) as Partial<
+    typeof BaseConfigurationAnnotation.State
+  >;
+  return {
+    embeddingModel: configurable.embeddingModel || 'text-embedding-3-small',
+    retrieverProvider: configurable.retrieverProvider || 'supabase',
+    filter: configurable.filter,
+    k: configurable.k || 4,
+  };
+}
@@ -2,24 +2,63 @@ import { VectorStoreRetriever } from '@langchain/core/vectorstores';
 import { OpenAIEmbeddings } from '@langchain/openai';
 import { SupabaseVectorStore } from '@langchain/community/vectorstores/supabase';
 import { createClient } from '@supabase/supabase-js';
+import { RunnableConfig } from '@langchain/core/runnables';
+import { Embeddings } from '@langchain/core/embeddings';
+import { ensureBaseConfiguration } from './configuration.js';
+import { Chroma } from '@langchain/community/vectorstores/chroma';

-const embeddings = new OpenAIEmbeddings({
-  model: 'text-embedding-3-small',
-});
-export async function makeSupabaseRetriever(): Promise<VectorStoreRetriever> {
+export async function makeSupabaseRetriever(
+  configuration: ReturnType<typeof ensureBaseConfiguration>,
+  embeddingModel: Embeddings
+): Promise<VectorStoreRetriever> {
  if (!process.env.SUPABASE_URL || !process.env.SUPABASE_SERVICE_ROLE_KEY) {
    throw new Error(
-      'SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY environment variables are not defined',
+      'SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY environment variables are not defined'
    );
  }
  const supabaseClient = createClient(
    process.env.SUPABASE_URL ?? '',
-    process.env.SUPABASE_SERVICE_ROLE_KEY ?? '',
+    process.env.SUPABASE_SERVICE_ROLE_KEY ?? ''
  );
-  const vectorStore = new SupabaseVectorStore(embeddings, {
+  const vectorStore = new SupabaseVectorStore(embeddingModel, {
    client: supabaseClient,
    tableName: 'documents',
    queryName: 'match_documents',
  });
-  return vectorStore.asRetriever();
+  return vectorStore.asRetriever({
+    filter: configuration.filter,
+    k: configuration.k,
+  });
+}
+
+export async function makeChromaRetriever(
+  configuration: ReturnType<typeof ensureBaseConfiguration>,
+  embeddingModel: Embeddings
+) {
+  const vectorStore = new Chroma(embeddingModel, {
+    collectionName: 'documents',
+  });
+  return vectorStore.asRetriever({
+    filter: configuration.filter,
+    k: configuration.k,
+  });
+}
+
+export async function makeRetriever(
+  config: RunnableConfig
+): Promise<VectorStoreRetriever> {
+  const configuration = ensureBaseConfiguration(config);
+  const embeddingModel = new OpenAIEmbeddings({
+    model: configuration.embeddingModel,
+  });
+  switch (configuration.retrieverProvider) {
+    case 'supabase':
+      return makeSupabaseRetriever(configuration, embeddingModel);
+    case 'chroma':
+      return makeChromaRetriever(configuration, embeddingModel);
+    default:
+      throw new Error(
+        `Unrecognized retrieverProvider in configuration: ${configuration.retrieverProvider}`
+      );
+  }
 }
@@ -0,0 +1,59 @@
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { initChatModel } from 'langchain/chat_models/universal';
+
+const SUPPORTED_PROVIDERS = [
+  'openai',
+  'anthropic',
+  'azure_openai',
+  'cohere',
+  'google-vertexai',
+  'google-vertexai-web',
+  'google-genai',
+  'ollama',
+  'together',
+  'fireworks',
+  'mistralai',
+  'groq',
+  'bedrock',
+  'cerebras',
+  'deepseek',
+  'xai',
+] as const;
+/**
+ * Load a chat model from a fully specified name.
+ * @param fullySpecifiedName - String in the format 'provider/model' or 'provider/account/provider/model'.
+ * @returns A Promise that resolves to a BaseChatModel instance.
+ */
+export async function loadChatModel(
+  fullySpecifiedName: string,
+  temperature: number = 0.2
+): Promise<BaseChatModel> {
+  const index = fullySpecifiedName.indexOf('/');
+  if (index === -1) {
+    // If there's no "/", assume it's just the model
+    if (
+      !SUPPORTED_PROVIDERS.includes(
+        fullySpecifiedName as (typeof SUPPORTED_PROVIDERS)[number]
+      )
+    ) {
+      throw new Error(`Unsupported model: ${fullySpecifiedName}`);
+    }
+    return await initChatModel(fullySpecifiedName, {
+      temperature: temperature,
+    });
+  } else {
+    const provider = fullySpecifiedName.slice(0, index);
+    const model = fullySpecifiedName.slice(index + 1);
+    if (
+      !SUPPORTED_PROVIDERS.includes(
+        provider as (typeof SUPPORTED_PROVIDERS)[number]
+      )
+    ) {
+      throw new Error(`Unsupported provider: ${provider}`);
+    }
+    return await initChatModel(model, {
+      modelProvider: provider,
+      temperature: temperature,
+    });
+  }
+}
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "NodeNext",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": false,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules"]
+}
@@ -1,21 +1,28 @@
-{ 
-    "name": "learning-langchain-repo",
-    "description": "Learning LangChain O'Reilly book code examples",
-    "type": "module",
-    "author": "Nuno Campos and Mayo Oshin",
-    "dependencies": {
-        "@langchain/community": "^0.3.26",
-        "@langchain/core": "^0.3.33",
-        "@langchain/langgraph": "^0.2.41",
-        "@langchain/openai": "^0.3.17",
-        "@supabase/supabase-js": "^2.44.0",
-        "@langchain/langgraph-cli": "^0.0.1",
-        "@langchain/langgraph-sdk": "^0.0.36",    
-        "duck-duck-scrape": "^2.2.7",
-        "expr-eval": "^2.0.2",
-        "langchain": "^0.3.12",
-        "pg": "^8.13.1",
-        "sqlite3": "^5.1.7",
-        "typeorm": "^0.3.20"
-    }
+{
+	"name": "learning-langchain-repo",
+	"description": "Learning LangChain O'Reilly book code examples",
+	"type": "module",
+	"author": "Nuno Campos and Mayo Oshin",
+	"scripts": {
+		"langgraph:dev": "npx @langchain/langgraph-cli dev -c ch9/js/langgraph.json --verbose"
+	},
+	"dependencies": {
+		"@langchain/community": "^0.3.26",
+		"@langchain/core": "^0.3.33",
+		"@langchain/langgraph": "^0.2.41",
+		"@langchain/langgraph-cli": "^0.0.1",
+		"@langchain/langgraph-sdk": "^0.0.36",
+		"@langchain/openai": "^0.3.17",
+		"@supabase/supabase-js": "^2.44.0",
+		"duck-duck-scrape": "^2.2.7",
+		"expr-eval": "^2.0.2",
+		"langchain": "^0.3.15",
+		"pdf-parse": "^1.1.1",
+		"pg": "^8.13.1",
+		"sqlite3": "^5.1.7",
+		"typeorm": "^0.3.20"
+	},
+	"devDependencies": {
+		"@types/pdf-parse": "^1.1.4"
+	}
 }