Polish style

2026-07-01 20:24:10 -04:00 · 2024-09-18 23:16:57 -07:00
parent 3477826e0a 8e4412a070
commit 46a181b01c
11 changed files with 198 additions and 211 deletions
@@ -2,6 +2,8 @@ TAVILY_API_KEY=...

 # To separate your traces from other application
 LANGCHAIN_PROJECT=data-enrichment
+# LANGCHAIN_API_KEY=...
+# LANGCHAIN_TRACING_V2=true

 # The following depend on your selected configuration

@@ -50,4 +50,7 @@ jobs:
          path: src/

      - name: Run tests
+        env:
+          ANTHROPIC_API_KEY: afakekey
+          TAVILY_API_KEY: anotherfakekey
        run: yarn test
@@ -93,50 +93,74 @@ End setup instructions

 3. Consider a research topic and desired extraction schema.

-As an example, here is a research topic we can consider.
+As an example, here is a research topic we can consider:
+
+```
+"Autonomous agents"
+```
+
+With an `extractionSchema` of:
+
+```json
+{
+  "type": "object",
+  "properties": {
+    "facts": {
+      "type": "array",
+      "description": "An array of facts retrieved from the provided sources",
+      "items": {
+        "type": "string"
+      }
+    }
+  },
+  "required": ["facts"]
+}
+```
+
+Another example topic with a more complex schema is:

 ```
 "Top 5 chip providers for LLM Training"
 ```

-And here is a desired extraction schema.
+And here is a desired `extractionSchema`:

 ```json
-"extractionSchema": {
-    "type": "object",
-    "properties": {
-        "companies": {
-            "type": "array",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "description": "Company name"
-                    },
-                    "technologies": {
-                        "type": "string",
-                        "description": "Brief summary of key technologies used by the company"
-                    },
-                    "market_share": {
-                        "type": "string",
-                        "description": "Overview of market share for this company"
-                    },
-                    "future_outlook": {
-                        "type": "string",
-                        "description": "Brief summary of future prospects and developments in the field for this company"
-                    },
-                    "key_powers": {
-                        "type": "string",
-                        "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage"
-                    }
-                },
-                "required": ["name", "technologies", "market_share", "future_outlook"]
-            },
-            "description": "List of companies"
-        }
-    },
-    "required": ["companies"]
+{
+  "type": "object",
+  "properties": {
+    "companies": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "description": "Company name"
+          },
+          "technologies": {
+            "type": "string",
+            "description": "Brief summary of key technologies used by the company"
+          },
+          "market_share": {
+            "type": "string",
+            "description": "Overview of market share for this company"
+          },
+          "future_outlook": {
+            "type": "string",
+            "description": "Brief summary of future prospects and developments in the field for this company"
+          },
+          "key_powers": {
+            "type": "string",
+            "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage"
+          }
+        },
+        "required": ["name", "technologies", "market_share", "future_outlook"]
+      },
+      "description": "List of companies"
+    }
+  },
+  "required": ["companies"]
 }
 ```

@@ -145,7 +169,7 @@ And here is a desired extraction schema.
 ## How to customize

 1. **Customize research targets**: Provide a custom JSON `extractionSchema` when calling the graph to gather different types of information.
-2. **Select a different model**: We default to anthropic (claude-3-5-sonnet-20240620). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`.
+2. **Select a different model**: We default to anthropic (`claude-3-5-sonnet-20240620`). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`.
 3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration.

 For quick prototyping, these configurations can be set in the studio UI.
@@ -163,7 +187,7 @@ While iterating on your graph, you can edit past state and rerun your app from p

 Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right.

-You can find the latest (under construction) docs on [LangGraph.JS](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.
+You can find the latest (under construction) docs on [LangGraph.js](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.

 LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates.

@@ -23,7 +23,7 @@
  "dependencies": {
    "@langchain/anthropic": "^0.3.1",
    "@langchain/community": "^0.3.1",
-    "@langchain/core": "^0.3.2",
+    "@langchain/core": "^0.3.3",
    "@langchain/langgraph": "^0.2.8",
    "langchain": "^0.3.2",
    "langsmith": "^0.1.59",
@@ -36,6 +36,7 @@
    "@jest/globals": "^29.7.0",
    "@tsconfig/recommended": "^1.0.7",
    "@types/jest": "^29.5.0",
+    "@types/node": "^20.14.8",
    "@typescript-eslint/eslint-plugin": "^5.59.8",
    "@typescript-eslint/parser": "^5.59.8",
    "dotenv": "^16.4.5",
@@ -49,15 +49,15 @@ export const ConfigurationAnnotation = Annotation.Root({
 export function ensureConfiguration(
  config?: RunnableConfig,
 ): typeof ConfigurationAnnotation.State {
-  const configurable = (config?.configurable || {}) as Partial<
+  const configurable = (config?.configurable ?? {}) as Partial<
    typeof ConfigurationAnnotation.State
  >;

  return {
-    model: configurable.model || "anthropic/claude-3-5-sonnet-20240620",
-    prompt: configurable.prompt || MAIN_PROMPT,
-    maxSearchResults: configurable.maxSearchResults || 10,
-    maxInfoToolCalls: configurable.maxInfoToolCalls || 3,
-    maxLoops: configurable.maxLoops || 6,
+    model: configurable.model ?? "anthropic/claude-3-5-sonnet-20240620",
+    prompt: configurable.prompt ?? MAIN_PROMPT,
+    maxSearchResults: configurable.maxSearchResults ?? 5,
+    maxInfoToolCalls: configurable.maxInfoToolCalls ?? 3,
+    maxLoops: configurable.maxLoops ?? 6,
  };
 }
@@ -14,12 +14,13 @@ import { RunnableConfig } from "@langchain/core/runnables";
 import { tool } from "@langchain/core/tools";
 import { StateGraph } from "@langchain/langgraph";
 import { z } from "zod";
+
 import {
  ConfigurationAnnotation,
  ensureConfiguration,
 } from "./configuration.js";
 import { AnyRecord, InputStateAnnotation, StateAnnotation } from "./state.js";
-import { toolNode, TOOLS } from "./tools.js";
+import { MODEL_TOOLS, toolNode } from "./tools.js";
 import { loadChatModel } from "./utils.js";

 /**
@@ -43,15 +44,13 @@ import { loadChatModel } from "./utils.js";
 async function callAgentModel(
  state: typeof StateAnnotation.State,
  config: RunnableConfig,
-): Promise<{
-  messages: BaseMessage[];
-  info?: AnyRecord;
-  loopStep: number;
-}> {
+): Promise<typeof StateAnnotation.Update> {
  const configuration = ensureConfiguration(config);
  // First, define the info tool. This uses the user-provided
  // json schema to define the research targets
-  const infoTool = tool(async (_args: AnyRecord) => {}, {
+  // We pass an empty function because we will not actually invoke this tool.
+  // We are just using it for formatting.
+  const infoTool = tool(async () => {}, {
    name: "Info",
    description: "Call this when you have gathered all the relevant info",
    schema: state.extractionSchema,
@@ -61,7 +60,7 @@ async function callAgentModel(
  if (!rawModel.bindTools) {
    throw new Error("Chat model does not support tool binding");
  }
-  const model = rawModel.bindTools([...TOOLS, infoTool], {
+  const model = rawModel.bindTools([...MODEL_TOOLS, infoTool], {
    tool_choice: "any",
  });

@@ -73,7 +72,7 @@ async function callAgentModel(

  // Next, we'll call the model.
  const response: AIMessage = await model.invoke(messages);
-  const response_messages = [response];
+  const responseMessages = [response];

  // If the model has collected enough information to fill uot
  // the provided schema, great! It will call the "Info" tool
@@ -96,13 +95,13 @@ async function callAgentModel(
    }
  } else {
    // If LLM didn't respect the tool_choice
-    response_messages.push(
+    responseMessages.push(
      new HumanMessage("Please respond by calling one of the provided tools."),
    );
  }

  return {
-    messages: response_messages,
+    messages: responseMessages,
    info,
    // This increments the step counter.
    // We configure a max step count to avoid infinite research loops
@@ -187,7 +186,7 @@ If you don't think it is good, you should be very specific about what could be i
  );
  messages.push({ role: "user", content: p1 });

-  // Calll the model
+  // Call the model
  const response = await boundModel.invoke(messages);
  if (response.is_satisfactory && presumedInfo) {
    return {
@@ -197,7 +196,7 @@ If you don't think it is good, you should be very specific about what could be i
          tool_call_id: lastMessage.tool_calls?.[0]?.id || "",
          content: response.reason.join("\n"),
          name: "Info",
-          additional_kwargs: { artifact: response },
+          artifact: response,
          status: "success",
        }),
      ],
@@ -209,7 +208,7 @@ If you don't think it is good, you should be very specific about what could be i
          tool_call_id: lastMessage.tool_calls?.[0]?.id || "",
          content: `Unsatisfactory response:\n${response.improvement_instructions}`,
          name: "Info",
-          additional_kwargs: { artifact: response },
+          artifact: response,
          status: "error",
        }),
      ],
@@ -1,6 +1,5 @@
 import { Annotation, messagesStateReducer } from "@langchain/langgraph";
 import { type BaseMessage } from "@langchain/core/messages";
-import { z } from "zod";

 // eslint-disable-next-line
 export type AnyRecord = Record<string, any>;
@@ -11,11 +10,11 @@ export const InputStateAnnotation = Annotation.Root({
   * The info state trackes the current extracted data for the given topic,
   * conforming to the provided schema.
   */
-  info: Annotation<z.infer<z.ZodObject<z.ZodRawShape>>>,
+  info: Annotation<AnyRecord>,
  /**
   * The schema defines the information the agent is tasked with filling out.
   */
-  extractionSchema: Annotation<z.ZodObject<z.ZodRawShape>>,
+  extractionSchema: Annotation<AnyRecord>,
  // Feel free to add additional attributes to your state as needed.
  // Common examples include retrieved documents, extracted entities, API connections, etc.
 });
@@ -60,17 +59,17 @@ export const StateAnnotation = Annotation.Root({
    reducer: messagesStateReducer,
    default: () => [],
  }),
+
  topic: Annotation<string>,
  /**
   * The info state trackes the current extracted data for the given topic,
   * conforming to the provided schema.
   */
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  info: Annotation<AnyRecord>,
+
  /**
   * The schema defines the information the agent is tasked with filling out.
   */
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  extractionSchema: Annotation<AnyRecord>,

  /**
@@ -7,11 +7,12 @@
 */
 import { TavilySearchResults } from "@langchain/community/tools/tavily_search";
 import { RunnableConfig } from "@langchain/core/runnables";
+import { tool } from "@langchain/core/tools";

+import { INFO_PROMPT } from "./prompts.js";
 import { ensureConfiguration } from "./configuration.js";
-import { AnyRecord, StateAnnotation } from "./state.js";
-import { StructuredTool, tool } from "@langchain/core/tools";
-import { curry, getTextContent, loadChatModel } from "./utils.js";
+import { StateAnnotation } from "./state.js";
+import { getTextContent, loadChatModel } from "./utils.js";
 import {
  AIMessage,
  isBaseMessage,
@@ -19,10 +20,14 @@ import {
 } from "@langchain/core/messages";
 import { z } from "zod";

-async function search(
-  { query }: { query: string },
-  config: RunnableConfig,
-): Promise<Array<AnyRecord> | null> {
+/**
+ * Initialize tools within a function so that they have access to the current
+ * state and config at runtime.
+ */
+function initializeTools(
+  state?: typeof StateAnnotation.State,
+  config?: RunnableConfig,
+) {
  /**
   * Search for general results.
   *
@@ -31,135 +36,93 @@ async function search(
   * for answering questions about current events.
   */
  const configuration = ensureConfiguration(config);
-  const wrapped = new TavilySearchResults({
+  const searchTool = new TavilySearchResults({
    maxResults: configuration.maxSearchResults,
  });
-  const result = await wrapped.invoke(query, config);
-  return result as Array<AnyRecord> | null;
+
+  async function scrapeWebsite({ url }: { url: string }): Promise<string> {
+    /**
+     * Scrape and summarize content from a given URL.
+     */
+    const response = await fetch(url);
+    const content = await response.text();
+    const truncatedContent = content.slice(0, 50000);
+    const p = INFO_PROMPT.replace(
+      "{info}",
+      JSON.stringify(state?.extractionSchema, null, 2),
+    )
+      .replace("{url}", url)
+      .replace("{content}", truncatedContent);
+
+    const rawModel = await loadChatModel(configuration.model);
+    const result = await rawModel.invoke(p);
+    return getTextContent(result.content);
+  }
+
+  const scraperTool = tool(scrapeWebsite, {
+    name: "scrapeWebsite",
+    description: "Scrape content from a given website URL",
+    schema: z.object({
+      url: z.string().url().describe("The URL of the website to scrape"),
+    }),
+  });
+
+  return [searchTool, scraperTool];
 }

-const INFO_PROMPT = `You are doing web research on behalf of a user. You are trying to find out this information:
-
-<info>
-{info}
-</info>
-
-You just scraped the following website: {url}
-
-Based on the website content below, jot down some notes about the website.
-
-<Website content>
-{content}
-</Website content>`;
-
-async function scrapeWebsite(
-  {
-    url,
-    __state,
-  }: {
-    url: string;
-    __state?: typeof StateAnnotation.State;
-  },
+export const toolNode = async (
+  state: typeof StateAnnotation.State,
  config: RunnableConfig,
-): Promise<string> {
-  /**
-   * Scrape and summarize content from a given URL.
-   */
-  const response = await fetch(url);
-  const content = await response.text();
-  const truncatedContent = content.slice(0, 50000);
-  const configuration = ensureConfiguration(config);
-  const p = INFO_PROMPT.replace(
-    "{info}",
-    JSON.stringify(__state?.extractionSchema, null, 2),
-  )
-    .replace("{url}", url)
-    .replace("{content}", truncatedContent);
-
-  const rawModel = await loadChatModel(configuration.model);
-  const result = await rawModel.invoke(p, { callbacks: config?.callbacks });
-  return getTextContent(result.content);
-}
-
-export const createToolNode = (tools: StructuredTool[]) => {
-  const toolNode = async (
-    state: typeof StateAnnotation.State,
-    config: RunnableConfig,
-  ) => {
-    const message = state.messages[state.messages.length - 1];
-    const outputs = await Promise.all(
-      (message as AIMessage).tool_calls?.map(async (call) => {
-        const tool = tools.find((tool) => tool.name === call.name);
-        try {
-          if (tool === undefined) {
-            throw new Error(`Tool "${call.name}" not found.`);
-          }
-          const newCall = {
-            ...call,
-            args: {
-              __state: state,
-              ...call.args,
-            },
-          };
-          const output = await tool.invoke(
-            { ...newCall, type: "tool_call" },
-            config,
-          );
-          if (isBaseMessage(output) && output._getType() === "tool") {
-            return output;
-          } else {
-            return new ToolMessage({
-              name: tool.name,
-              content:
-                typeof output === "string" ? output : JSON.stringify(output),
-              tool_call_id: call.id ?? "",
-            });
-          }
-          // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        } catch (e: any) {
+) => {
+  const message = state.messages[state.messages.length - 1];
+  // Initialize the tools within the context of the node so that the tools
+  // have the current state of the graph and the config in scope.
+  // See: https://js.langchain.com/docs/how_to/tool_runtime
+  const tools = initializeTools(state, config);
+  const outputs = await Promise.all(
+    (message as AIMessage).tool_calls?.map(async (call) => {
+      const tool = tools.find((tool) => tool.name === call.name);
+      try {
+        if (tool === undefined) {
+          throw new Error(`Tool "${call.name}" not found.`);
+        }
+        const newCall = {
+          ...call,
+          args: {
+            __state: state,
+            ...call.args,
+          },
+        };
+        const output = await tool.invoke(
+          { ...newCall, type: "tool_call" },
+          config,
+        );
+        if (isBaseMessage(output) && output._getType() === "tool") {
+          return output;
+        } else {
          return new ToolMessage({
-            content: `Error: ${e.message}\n Please fix your mistakes.`,
-            name: call.name,
+            name: tool.name,
+            content:
+              typeof output === "string" ? output : JSON.stringify(output),
            tool_call_id: call.id ?? "",
-            status: "error",
          });
        }
-      }) ?? [],
-    );
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      } catch (e: any) {
+        return new ToolMessage({
+          content: `Error: ${e.message}\n Please fix your mistakes.`,
+          name: call.name,
+          tool_call_id: call.id ?? "",
+          status: "error",
+        });
+      }
+    }) ?? [],
+  );

-    return { messages: outputs };
-  };
-  return toolNode;
+  return { messages: outputs };
 };

-const searchTool = tool(search, {
-  name: "search",
-  description: "Search the internet for information on a given topic",
-  schema: z.object({
-    query: z.string().describe("The search query to look up"),
-  }),
-});
-
-// Exposed to the
-export const TOOLS = [
-  searchTool,
-  tool(curry(scrapeWebsite, { __state: undefined }), {
-    name: "scrapeWebsite",
-    description: "Scrape content from a given website URL",
-    schema: z.object({
-      url: z.string().url().describe("The URL of the website to scrape"),
-    }),
-  }),
-];
-
-export const toolNode = createToolNode([
-  searchTool,
-  tool(scrapeWebsite, {
-    name: "scrapeWebsite",
-    description: "Scrape content from a given website URL",
-    schema: z.object({
-      url: z.string().url().describe("The URL of the website to scrape"),
-      __state: z.any(),
-    }),
-  }),
-]);
+// No state or config required here since these are just bound to the chat model
+// and are only used to define schema.
+// The tool node above will actually call the functions.
+export const MODEL_TOOLS = initializeTools();
@@ -5,21 +5,6 @@ import {
 } from "@langchain/core/messages";
 import { initChatModel } from "langchain/chat_models/universal";

-export function curry<
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  F extends (...args: any[]) => any,
-  P extends Partial<Parameters<F>[0]> = Partial<Parameters<F>[0]>,
->(fn: F, partialArg: P) {
-  return function (
-    this: unknown,
-    arg: Omit<Parameters<F>[0], keyof P> & Partial<P>,
-    ...rest: Parameters<F> extends [unknown, ...infer R] ? R : never
-  ): ReturnType<F> {
-    const mergedArg = { ...partialArg, ...arg } as Parameters<F>[0];
-    return fn.apply(this, [mergedArg, ...rest]) as ReturnType<F>;
-  };
-}
-
 /**
 * Helper function to extract text content from a complex message.
 *
@@ -2,6 +2,10 @@ import { describe, it, expect } from "@jest/globals";
 import { graph } from "../src/enrichment_agent/graph.js";

 describe("Web Research Agent", () => {
+  beforeAll(() => {
+    process.env.TAVILY_API_KEY = "dummy";
+  });
+
  it("should initialize and compile the graph", () => {
    expect(graph).toBeDefined();
    expect(graph.name).toBe("ResearchTopic");
@@ -656,10 +656,10 @@
    zod "^3.22.3"
    zod-to-json-schema "^3.22.5"

-"@langchain/core@^0.3.2":
-  version "0.3.2"
-  resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.2.tgz#aff6d83149a40e0e735910f583aca0f1dd7d1bab"
-  integrity sha512-FeoDOStP8l1YdxgykpXnVoEnl4lxGNSOdYzUJN/EdFtkc6cIjDDS5+xewajme0+egaUsO4tGLezKaFpoWxAyQA==
+"@langchain/core@^0.3.3":
+  version "0.3.3"
+  resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.3.tgz#af12fd767ff2fcedb0a71bd79e6588d7dd52b6b6"
+  integrity sha512-WAtkmhbdl2T41qzimTzhb3pXCHQxO4onqxzPxgdf3KftQdTwLq0YYBDhozRMZLNAd/+cfH0ymZGaZSsnc9Ogsg==
  dependencies:
    ansi-styles "^5.0.0"
    camelcase "6"
@@ -876,6 +876,13 @@
  dependencies:
    undici-types "~5.26.4"

+"@types/node@^20.14.8":
+  version "20.16.5"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-20.16.5.tgz#d43c7f973b32ffdf9aa7bd4f80e1072310fd7a53"
+  integrity sha512-VwYCweNo3ERajwy0IUlqqcyZ8/A7Zwa9ZP3MnENWcB11AejO+tLy3pu850goUW2FC/IJMdZUfKpX/yxL1gymCA==
+  dependencies:
+    undici-types "~6.19.2"
+
 "@types/retry@0.12.0":
  version "0.12.0"
  resolved "https://registry.yarnpkg.com/@types/retry/-/retry-0.12.0.tgz#2b35eccfcee7d38cd72ad99232fbd58bffb3c84d"