Add tests

2026-07-01 20:24:10 -04:00 · 2024-09-18 17:58:46 -07:00
parent 9e35a7beca
commit 3f407cdb50
9 changed files with 85 additions and 30 deletions
@@ -4,23 +4,23 @@
 [![Integration Tests](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/integration-tests.yml/badge.svg)](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/integration-tests.yml)
 [![Open in - LangGraph Studio](https://img.shields.io/badge/Open_in-LangGraph_Studio-00324d.svg?logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4NS4zMzMiIGhlaWdodD0iODUuMzMzIiB2ZXJzaW9uPSIxLjAiIHZpZXdCb3g9IjAgMCA2NCA2NCI+PHBhdGggZD0iTTEzIDcuOGMtNi4zIDMuMS03LjEgNi4zLTYuOCAyNS43LjQgMjQuNi4zIDI0LjUgMjUuOSAyNC41QzU3LjUgNTggNTggNTcuNSA1OCAzMi4zIDU4IDcuMyA1Ni43IDYgMzIgNmMtMTIuOCAwLTE2LjEuMy0xOSAxLjhtMzcuNiAxNi42YzIuOCAyLjggMy40IDQuMiAzLjQgNy42cy0uNiA0LjgtMy40IDcuNkw0Ny4yIDQzSDE2LjhsLTMuNC0zLjRjLTQuOC00LjgtNC44LTEwLjQgMC0xNS4ybDMuNC0zLjRoMzAuNHoiLz48cGF0aCBkPSJNMTguOSAyNS42Yy0xLjEgMS4zLTEgMS43LjQgMi41LjkuNiAxLjcgMS44IDEuNyAyLjcgMCAxIC43IDIuOCAxLjYgNC4xIDEuNCAxLjkgMS40IDIuNS4zIDMuMi0xIC42LS42LjkgMS40LjkgMS41IDAgMi43LS41IDIuNy0xIDAtLjYgMS4xLS44IDIuNi0uNGwyLjYuNy0xLjgtMi45Yy01LjktOS4zLTkuNC0xMi4zLTExLjUtOS44TTM5IDI2YzAgMS4xLS45IDIuNS0yIDMuMi0yLjQgMS41LTIuNiAzLjQtLjUgNC4yLjguMyAyIDEuNyAyLjUgMy4xLjYgMS41IDEuNCAyLjMgMiAyIDEuNS0uOSAxLjItMy41LS40LTMuNS0yLjEgMC0yLjgtMi44LS44LTMuMyAxLjYtLjQgMS42LS41IDAtLjYtMS4xLS4xLTEuNS0uNi0xLjItMS42LjctMS43IDMuMy0yLjEgMy41LS41LjEuNS4yIDEuNi4zIDIuMiAwIC43LjkgMS40IDEuOSAxLjYgMi4xLjQgMi4zLTIuMy4yLTMuMi0uOC0uMy0yLTEuNy0yLjUtMy4xLTEuMS0zLTMtMy4zLTMtLjUiLz48L3N2Zz4=)](https://langgraph-studio.vercel.app/templates/open?githubUrl=https://github.com/langchain-ai/data-enrichment-js)

-This is a starter project to help you get started with developing a data enrichment agent using [LangGraph.js](https://github.com/langchain-ai/langgraphjs) in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio).
+Producing structured results (e.g., to populate a database or spreadsheet) from open-ended research (e.g., web research) is a common use case that LLM-powered agents are well-suited to handle. Here, we provide a general template for this kind of "data enrichment agent" agent using [LangGraph](https://github.com/langchain-ai/langgraph) in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio). It contains an example graph exported from `src/enrichment_agent/graph.ts` that implements a research assistant capable of automatically gathering information on various topics from the web and structuring the results into a user-defined JSON format.
+
+![Overview of agent](./static/overview.png)

 ![](/static/studio.png)

-It contains an example graph exported from `src/enrichment_agent/graph.ts` that implements a research assistant capable of automatically gathering information on various topics from the web.
+# What it does

-## What it does
+The enrichment agent defined in `src/enrichment_agent/graph.py` performs the following steps:

-The enrichment agent:
-
-1. Takes a research **topic** and requested **extractionSchema** as input
+1. Takes a research **topic** and requested **extractionSchema** as input.
 2. Searches the web for relevant information
 3. Reads and extracts key details from websites
 4. Organizes the findings into the requested structured format
 5. Validates the gathered information for completeness and accuracy

-By default, it's set up to gather information based on the user-provided schema passed through the `extractionSchema` key in the state.
+![Graph view in LangGraph studio UI](./static/studio.png)

 ## Getting Started

@@ -91,19 +91,71 @@ OPENAI_API_KEY=your-api-key
 End setup instructions
 -->

-3. Customize whatever you'd like in the code.
-4. Open the folder LangGraph Studio!
+3. Consider a research topic and desired extraction schema.
+
+As an example, here is a research topic we can consider.
+
+```
+"Top 5 chip providers for LLM Training"
+```
+
+And here is a desired extraction schema.
+
+```json
+"extractionSchema": {
+    "type": "object",
+    "properties": {
+        "companies": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "Company name"
+                    },
+                    "technologies": {
+                        "type": "string",
+                        "description": "Brief summary of key technologies used by the company"
+                    },
+                    "market_share": {
+                        "type": "string",
+                        "description": "Overview of market share for this company"
+                    },
+                    "future_outlook": {
+                        "type": "string",
+                        "description": "Brief summary of future prospects and developments in the field for this company"
+                    },
+                    "key_powers": {
+                        "type": "string",
+                        "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage"
+                    }
+                },
+                "required": ["name", "technologies", "market_share", "future_outlook"]
+            },
+            "description": "List of companies"
+        }
+    },
+    "required": ["companies"]
+}
+```
+
+4. Open the folder LangGraph Studio, and input `topic` and `extractionSchema`.

 ## How to customize

-1. **Customize research targets**: Provide a custom `extractionSchema` when calling the graph to gather different types of information.
+1. **Customize research targets**: Provide a custom JSON `extractionSchema` when calling the graph to gather different types of information.
 2. **Select a different model**: We default to anthropic (claude-3-5-sonnet-20240620). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`.
-3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration in the studio.
+3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration.
+
+For quick prototyping, these configurations can be set in the studio UI.
+
+![Config In Studio](./static/config.png)

 You can also quickly extend this template by:

 - Adding new tools and API connections in [src/enrichment_agent/tools.ts](./src/enrichment_agent/tools.ts). These are just any TypeScript functions.
- Adding additional steps in [src/enrichment_agent/graph.ts](./src/enrichment_agent/graph.ts). Concerned about hallucination? Add a fact-checking step!
+- Adding additional steps in [src/enrichment_agent/graph.ts](./src/enrichment_agent/graph.ts).

 ## Development

@@ -111,7 +163,7 @@ While iterating on your graph, you can edit past state and rerun your app from p

 Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right.

-You can find the latest (under construction) docs on [LangGraph.js](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.
+You can find the latest (under construction) docs on [LangGraph.JS](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.

 LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates.

@@ -47,7 +47,7 @@ export const ConfigurationAnnotation = Annotation.Root({
 * @returns An instance of typeof ConfigurationAnnotation.State with the specified configuration.
 */
 export function ensureConfiguration(
-  config?: RunnableConfig
+  config?: RunnableConfig,
 ): typeof ConfigurationAnnotation.State {
  const configurable = (config?.configurable || {}) as Partial<
    typeof ConfigurationAnnotation.State
@@ -42,7 +42,7 @@ import { loadChatModel } from "./utils.js";

 async function callAgentModel(
  state: typeof StateAnnotation.State,
-  config: RunnableConfig
+  config: RunnableConfig,
 ): Promise<{
  messages: BaseMessage[];
  info?: AnyRecord;
@@ -89,7 +89,7 @@ async function callAgentModel(
        // (where the AI has called tools but no tool message has been provided)
        // we will drop any extra tool_calls.
        response.tool_calls = response.tool_calls?.filter(
-          (tool_call) => tool_call.name === "Info"
+          (tool_call) => tool_call.name === "Info",
        );
        break;
      }
@@ -97,7 +97,7 @@ async function callAgentModel(
  } else {
    // If LLM didn't respect the tool_choice
    response_messages.push(
-      new HumanMessage("Please respond by calling one of the provided tools.")
+      new HumanMessage("Please respond by calling one of the provided tools."),
    );
  }

@@ -117,18 +117,18 @@ const InfoIsSatisfactory = z.object({
  reason: z
    .array(z.string())
    .describe(
-      "First, provide reasoning for why this is either good or bad as a final result. Must include at least 3 reasons."
+      "First, provide reasoning for why this is either good or bad as a final result. Must include at least 3 reasons.",
    ),
  is_satisfactory: z
    .boolean()
    .describe(
-      "After providing your reasoning, provide a value indicating whether the result is satisfactory. If not, you will continue researching."
+      "After providing your reasoning, provide a value indicating whether the result is satisfactory. If not, you will continue researching.",
    ),
  improvement_instructions: z
    .string()
    .optional()
    .describe(
-      "If the result is not satisfactory, provide clear and specific instructions on what needs to be improved or added to make the information satisfactory. This should include details on missing information, areas that need more depth, or specific aspects to focus on in further research."
+      "If the result is not satisfactory, provide clear and specific instructions on what needs to be improved or added to make the information satisfactory. This should include details on missing information, areas that need more depth, or specific aspects to focus on in further research.",
    ),
 });

@@ -151,14 +151,14 @@ const InfoIsSatisfactory = z.object({
 */
 async function reflect(
  state: typeof StateAnnotation.State,
-  config: RunnableConfig
+  config: RunnableConfig,
 ): Promise<{ messages: BaseMessage[] } | { info: AnyRecord }> {
  const configuration = ensureConfiguration(config);
  const presumedInfo = state.info; // The current extracted result
  const lm = state.messages[state.messages.length - 1];
  if (!(lm._getType() === "ai")) {
    throw new Error(
-      `${reflect.name} expects the last message in the state to be an AI message with tool calls. Got: ${lm._getType()}`
+      `${reflect.name} expects the last message in the state to be an AI message with tool calls. Got: ${lm._getType()}`,
    );
  }
  const lastMessage = lm as AIMessage;
@@ -183,7 +183,7 @@ If you don't think it is good, you should be very specific about what could be i
 {presumed_info}`;
  const p1 = checker_prompt.replace(
    "{presumed_info}",
-    JSON.stringify(presumedInfo ?? {}, null, 2)
+    JSON.stringify(presumedInfo ?? {}, null, 2),
  );
  messages.push({ role: "user", content: p1 });

@@ -225,7 +225,7 @@ If you don't think it is good, you should be very specific about what could be i
 *          "tools" if the agent has called any other tool or no tool at all.
 */
 function routeAfterAgent(
-  state: typeof StateAnnotation.State
+  state: typeof StateAnnotation.State,
 ): "callAgentModel" | "reflect" | "tools" | "__end__" {
  const lastMessage: AIMessage = state.messages[state.messages.length - 1];

@@ -257,7 +257,7 @@ function routeAfterAgent(
 */
 function routeAfterChecker(
  state: typeof StateAnnotation.State,
-  config?: RunnableConfig
+  config?: RunnableConfig,
 ): "__end__" | "callAgentModel" {
  const configuration = ensureConfiguration(config);
  const lastMessage = state.messages[state.messages.length - 1];
@@ -268,7 +268,7 @@ function routeAfterChecker(
    }
    if (lastMessage._getType() !== "tool") {
      throw new Error(
-        `routeAfterChecker expected a tool message. Received: ${lastMessage._getType()}.`
+        `routeAfterChecker expected a tool message. Received: ${lastMessage._getType()}.`,
      );
    }
    if ((lastMessage as ToolMessage).status === "error") {
@@ -288,7 +288,7 @@ const workflow = new StateGraph(
    stateSchema: StateAnnotation,
    input: InputStateAnnotation,
  },
-  ConfigurationAnnotation
+  ConfigurationAnnotation,
 )
  .addNode("callAgentModel", callAgentModel)
  .addNode("reflect", reflect)
@@ -81,7 +81,10 @@ async function scrapeWebsite(
 }

 export const createToolNode = (tools: StructuredTool[]) => {
-  const toolNode = async (state: typeof StateAnnotation.State, config: RunnableConfig) => {
+  const toolNode = async (
+    state: typeof StateAnnotation.State,
+    config: RunnableConfig,
+  ) => {
    const message = state.messages[state.messages.length - 1];
    const outputs = await Promise.all(
      (message as AIMessage).tool_calls?.map(async (call) => {
@@ -93,7 +93,7 @@ describe("Researcher", () => {

    const nvidiaPresent = info.providers.some(
      (provider: { name: string }) =>
-        provider.name.toLowerCase().trim() === "nvidia"
+        provider.name.toLowerCase().trim() === "nvidia",
    );
    expect(nvidiaPresent).toBe(true);

@@ -108,7 +108,7 @@ describe("Researcher", () => {
        expect(provider.technology_summary).toBeDefined();
        expect(provider.current_market_share).toBeDefined();
        expect(provider.future_outlook).toBeDefined();
-      }
+      },
    );

    expect(info.overall_market_trends).toBeDefined();