Polish style

This commit is contained in:
William FH
2024-09-18 23:16:57 -07:00
committed by William Fu-Hinthorn
11 changed files with 198 additions and 211 deletions
+2
View File
@@ -2,6 +2,8 @@ TAVILY_API_KEY=...
# To separate your traces from other application
LANGCHAIN_PROJECT=data-enrichment
# LANGCHAIN_API_KEY=...
# LANGCHAIN_TRACING_V2=true
# The following depend on your selected configuration
+3
View File
@@ -50,4 +50,7 @@ jobs:
path: src/
- name: Run tests
env:
ANTHROPIC_API_KEY: afakekey
TAVILY_API_KEY: anotherfakekey
run: yarn test
+63 -39
View File
@@ -93,50 +93,74 @@ End setup instructions
3. Consider a research topic and desired extraction schema.
As an example, here is a research topic we can consider.
As an example, here is a research topic we can consider:
```
"Autonomous agents"
```
With an `extractionSchema` of:
```json
{
"type": "object",
"properties": {
"facts": {
"type": "array",
"description": "An array of facts retrieved from the provided sources",
"items": {
"type": "string"
}
}
},
"required": ["facts"]
}
```
Another example topic with a more complex schema is:
```
"Top 5 chip providers for LLM Training"
```
And here is a desired extraction schema.
And here is a desired `extractionSchema`:
```json
"extractionSchema": {
"type": "object",
"properties": {
"companies": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Company name"
},
"technologies": {
"type": "string",
"description": "Brief summary of key technologies used by the company"
},
"market_share": {
"type": "string",
"description": "Overview of market share for this company"
},
"future_outlook": {
"type": "string",
"description": "Brief summary of future prospects and developments in the field for this company"
},
"key_powers": {
"type": "string",
"description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage"
}
},
"required": ["name", "technologies", "market_share", "future_outlook"]
},
"description": "List of companies"
}
},
"required": ["companies"]
{
"type": "object",
"properties": {
"companies": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Company name"
},
"technologies": {
"type": "string",
"description": "Brief summary of key technologies used by the company"
},
"market_share": {
"type": "string",
"description": "Overview of market share for this company"
},
"future_outlook": {
"type": "string",
"description": "Brief summary of future prospects and developments in the field for this company"
},
"key_powers": {
"type": "string",
"description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage"
}
},
"required": ["name", "technologies", "market_share", "future_outlook"]
},
"description": "List of companies"
}
},
"required": ["companies"]
}
```
@@ -145,7 +169,7 @@ And here is a desired extraction schema.
## How to customize
1. **Customize research targets**: Provide a custom JSON `extractionSchema` when calling the graph to gather different types of information.
2. **Select a different model**: We default to anthropic (claude-3-5-sonnet-20240620). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`.
2. **Select a different model**: We default to anthropic (`claude-3-5-sonnet-20240620`). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`.
3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration.
For quick prototyping, these configurations can be set in the studio UI.
@@ -163,7 +187,7 @@ While iterating on your graph, you can edit past state and rerun your app from p
Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right.
You can find the latest (under construction) docs on [LangGraph.JS](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.
You can find the latest (under construction) docs on [LangGraph.js](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.
LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates.
+2 -1
View File
@@ -23,7 +23,7 @@
"dependencies": {
"@langchain/anthropic": "^0.3.1",
"@langchain/community": "^0.3.1",
"@langchain/core": "^0.3.2",
"@langchain/core": "^0.3.3",
"@langchain/langgraph": "^0.2.8",
"langchain": "^0.3.2",
"langsmith": "^0.1.59",
@@ -36,6 +36,7 @@
"@jest/globals": "^29.7.0",
"@tsconfig/recommended": "^1.0.7",
"@types/jest": "^29.5.0",
"@types/node": "^20.14.8",
"@typescript-eslint/eslint-plugin": "^5.59.8",
"@typescript-eslint/parser": "^5.59.8",
"dotenv": "^16.4.5",
+6 -6
View File
@@ -49,15 +49,15 @@ export const ConfigurationAnnotation = Annotation.Root({
export function ensureConfiguration(
config?: RunnableConfig,
): typeof ConfigurationAnnotation.State {
const configurable = (config?.configurable || {}) as Partial<
const configurable = (config?.configurable ?? {}) as Partial<
typeof ConfigurationAnnotation.State
>;
return {
model: configurable.model || "anthropic/claude-3-5-sonnet-20240620",
prompt: configurable.prompt || MAIN_PROMPT,
maxSearchResults: configurable.maxSearchResults || 10,
maxInfoToolCalls: configurable.maxInfoToolCalls || 3,
maxLoops: configurable.maxLoops || 6,
model: configurable.model ?? "anthropic/claude-3-5-sonnet-20240620",
prompt: configurable.prompt ?? MAIN_PROMPT,
maxSearchResults: configurable.maxSearchResults ?? 5,
maxInfoToolCalls: configurable.maxInfoToolCalls ?? 3,
maxLoops: configurable.maxLoops ?? 6,
};
}
+13 -14
View File
@@ -14,12 +14,13 @@ import { RunnableConfig } from "@langchain/core/runnables";
import { tool } from "@langchain/core/tools";
import { StateGraph } from "@langchain/langgraph";
import { z } from "zod";
import {
ConfigurationAnnotation,
ensureConfiguration,
} from "./configuration.js";
import { AnyRecord, InputStateAnnotation, StateAnnotation } from "./state.js";
import { toolNode, TOOLS } from "./tools.js";
import { MODEL_TOOLS, toolNode } from "./tools.js";
import { loadChatModel } from "./utils.js";
/**
@@ -43,15 +44,13 @@ import { loadChatModel } from "./utils.js";
async function callAgentModel(
state: typeof StateAnnotation.State,
config: RunnableConfig,
): Promise<{
messages: BaseMessage[];
info?: AnyRecord;
loopStep: number;
}> {
): Promise<typeof StateAnnotation.Update> {
const configuration = ensureConfiguration(config);
// First, define the info tool. This uses the user-provided
// json schema to define the research targets
const infoTool = tool(async (_args: AnyRecord) => {}, {
// We pass an empty function because we will not actually invoke this tool.
// We are just using it for formatting.
const infoTool = tool(async () => {}, {
name: "Info",
description: "Call this when you have gathered all the relevant info",
schema: state.extractionSchema,
@@ -61,7 +60,7 @@ async function callAgentModel(
if (!rawModel.bindTools) {
throw new Error("Chat model does not support tool binding");
}
const model = rawModel.bindTools([...TOOLS, infoTool], {
const model = rawModel.bindTools([...MODEL_TOOLS, infoTool], {
tool_choice: "any",
});
@@ -73,7 +72,7 @@ async function callAgentModel(
// Next, we'll call the model.
const response: AIMessage = await model.invoke(messages);
const response_messages = [response];
const responseMessages = [response];
// If the model has collected enough information to fill uot
// the provided schema, great! It will call the "Info" tool
@@ -96,13 +95,13 @@ async function callAgentModel(
}
} else {
// If LLM didn't respect the tool_choice
response_messages.push(
responseMessages.push(
new HumanMessage("Please respond by calling one of the provided tools."),
);
}
return {
messages: response_messages,
messages: responseMessages,
info,
// This increments the step counter.
// We configure a max step count to avoid infinite research loops
@@ -187,7 +186,7 @@ If you don't think it is good, you should be very specific about what could be i
);
messages.push({ role: "user", content: p1 });
// Calll the model
// Call the model
const response = await boundModel.invoke(messages);
if (response.is_satisfactory && presumedInfo) {
return {
@@ -197,7 +196,7 @@ If you don't think it is good, you should be very specific about what could be i
tool_call_id: lastMessage.tool_calls?.[0]?.id || "",
content: response.reason.join("\n"),
name: "Info",
additional_kwargs: { artifact: response },
artifact: response,
status: "success",
}),
],
@@ -209,7 +208,7 @@ If you don't think it is good, you should be very specific about what could be i
tool_call_id: lastMessage.tool_calls?.[0]?.id || "",
content: `Unsatisfactory response:\n${response.improvement_instructions}`,
name: "Info",
additional_kwargs: { artifact: response },
artifact: response,
status: "error",
}),
],
+4 -5
View File
@@ -1,6 +1,5 @@
import { Annotation, messagesStateReducer } from "@langchain/langgraph";
import { type BaseMessage } from "@langchain/core/messages";
import { z } from "zod";
// eslint-disable-next-line
export type AnyRecord = Record<string, any>;
@@ -11,11 +10,11 @@ export const InputStateAnnotation = Annotation.Root({
* The info state trackes the current extracted data for the given topic,
* conforming to the provided schema.
*/
info: Annotation<z.infer<z.ZodObject<z.ZodRawShape>>>,
info: Annotation<AnyRecord>,
/**
* The schema defines the information the agent is tasked with filling out.
*/
extractionSchema: Annotation<z.ZodObject<z.ZodRawShape>>,
extractionSchema: Annotation<AnyRecord>,
// Feel free to add additional attributes to your state as needed.
// Common examples include retrieved documents, extracted entities, API connections, etc.
});
@@ -60,17 +59,17 @@ export const StateAnnotation = Annotation.Root({
reducer: messagesStateReducer,
default: () => [],
}),
topic: Annotation<string>,
/**
* The info state trackes the current extracted data for the given topic,
* conforming to the provided schema.
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
info: Annotation<AnyRecord>,
/**
* The schema defines the information the agent is tasked with filling out.
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
extractionSchema: Annotation<AnyRecord>,
/**
+90 -127
View File
@@ -7,11 +7,12 @@
*/
import { TavilySearchResults } from "@langchain/community/tools/tavily_search";
import { RunnableConfig } from "@langchain/core/runnables";
import { tool } from "@langchain/core/tools";
import { INFO_PROMPT } from "./prompts.js";
import { ensureConfiguration } from "./configuration.js";
import { AnyRecord, StateAnnotation } from "./state.js";
import { StructuredTool, tool } from "@langchain/core/tools";
import { curry, getTextContent, loadChatModel } from "./utils.js";
import { StateAnnotation } from "./state.js";
import { getTextContent, loadChatModel } from "./utils.js";
import {
AIMessage,
isBaseMessage,
@@ -19,10 +20,14 @@ import {
} from "@langchain/core/messages";
import { z } from "zod";
async function search(
{ query }: { query: string },
config: RunnableConfig,
): Promise<Array<AnyRecord> | null> {
/**
* Initialize tools within a function so that they have access to the current
* state and config at runtime.
*/
function initializeTools(
state?: typeof StateAnnotation.State,
config?: RunnableConfig,
) {
/**
* Search for general results.
*
@@ -31,135 +36,93 @@ async function search(
* for answering questions about current events.
*/
const configuration = ensureConfiguration(config);
const wrapped = new TavilySearchResults({
const searchTool = new TavilySearchResults({
maxResults: configuration.maxSearchResults,
});
const result = await wrapped.invoke(query, config);
return result as Array<AnyRecord> | null;
async function scrapeWebsite({ url }: { url: string }): Promise<string> {
/**
* Scrape and summarize content from a given URL.
*/
const response = await fetch(url);
const content = await response.text();
const truncatedContent = content.slice(0, 50000);
const p = INFO_PROMPT.replace(
"{info}",
JSON.stringify(state?.extractionSchema, null, 2),
)
.replace("{url}", url)
.replace("{content}", truncatedContent);
const rawModel = await loadChatModel(configuration.model);
const result = await rawModel.invoke(p);
return getTextContent(result.content);
}
const scraperTool = tool(scrapeWebsite, {
name: "scrapeWebsite",
description: "Scrape content from a given website URL",
schema: z.object({
url: z.string().url().describe("The URL of the website to scrape"),
}),
});
return [searchTool, scraperTool];
}
const INFO_PROMPT = `You are doing web research on behalf of a user. You are trying to find out this information:
<info>
{info}
</info>
You just scraped the following website: {url}
Based on the website content below, jot down some notes about the website.
<Website content>
{content}
</Website content>`;
async function scrapeWebsite(
{
url,
__state,
}: {
url: string;
__state?: typeof StateAnnotation.State;
},
export const toolNode = async (
state: typeof StateAnnotation.State,
config: RunnableConfig,
): Promise<string> {
/**
* Scrape and summarize content from a given URL.
*/
const response = await fetch(url);
const content = await response.text();
const truncatedContent = content.slice(0, 50000);
const configuration = ensureConfiguration(config);
const p = INFO_PROMPT.replace(
"{info}",
JSON.stringify(__state?.extractionSchema, null, 2),
)
.replace("{url}", url)
.replace("{content}", truncatedContent);
const rawModel = await loadChatModel(configuration.model);
const result = await rawModel.invoke(p, { callbacks: config?.callbacks });
return getTextContent(result.content);
}
export const createToolNode = (tools: StructuredTool[]) => {
const toolNode = async (
state: typeof StateAnnotation.State,
config: RunnableConfig,
) => {
const message = state.messages[state.messages.length - 1];
const outputs = await Promise.all(
(message as AIMessage).tool_calls?.map(async (call) => {
const tool = tools.find((tool) => tool.name === call.name);
try {
if (tool === undefined) {
throw new Error(`Tool "${call.name}" not found.`);
}
const newCall = {
...call,
args: {
__state: state,
...call.args,
},
};
const output = await tool.invoke(
{ ...newCall, type: "tool_call" },
config,
);
if (isBaseMessage(output) && output._getType() === "tool") {
return output;
} else {
return new ToolMessage({
name: tool.name,
content:
typeof output === "string" ? output : JSON.stringify(output),
tool_call_id: call.id ?? "",
});
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
) => {
const message = state.messages[state.messages.length - 1];
// Initialize the tools within the context of the node so that the tools
// have the current state of the graph and the config in scope.
// See: https://js.langchain.com/docs/how_to/tool_runtime
const tools = initializeTools(state, config);
const outputs = await Promise.all(
(message as AIMessage).tool_calls?.map(async (call) => {
const tool = tools.find((tool) => tool.name === call.name);
try {
if (tool === undefined) {
throw new Error(`Tool "${call.name}" not found.`);
}
const newCall = {
...call,
args: {
__state: state,
...call.args,
},
};
const output = await tool.invoke(
{ ...newCall, type: "tool_call" },
config,
);
if (isBaseMessage(output) && output._getType() === "tool") {
return output;
} else {
return new ToolMessage({
content: `Error: ${e.message}\n Please fix your mistakes.`,
name: call.name,
name: tool.name,
content:
typeof output === "string" ? output : JSON.stringify(output),
tool_call_id: call.id ?? "",
status: "error",
});
}
}) ?? [],
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
return new ToolMessage({
content: `Error: ${e.message}\n Please fix your mistakes.`,
name: call.name,
tool_call_id: call.id ?? "",
status: "error",
});
}
}) ?? [],
);
return { messages: outputs };
};
return toolNode;
return { messages: outputs };
};
const searchTool = tool(search, {
name: "search",
description: "Search the internet for information on a given topic",
schema: z.object({
query: z.string().describe("The search query to look up"),
}),
});
// Exposed to the
export const TOOLS = [
searchTool,
tool(curry(scrapeWebsite, { __state: undefined }), {
name: "scrapeWebsite",
description: "Scrape content from a given website URL",
schema: z.object({
url: z.string().url().describe("The URL of the website to scrape"),
}),
}),
];
export const toolNode = createToolNode([
searchTool,
tool(scrapeWebsite, {
name: "scrapeWebsite",
description: "Scrape content from a given website URL",
schema: z.object({
url: z.string().url().describe("The URL of the website to scrape"),
__state: z.any(),
}),
}),
]);
// No state or config required here since these are just bound to the chat model
// and are only used to define schema.
// The tool node above will actually call the functions.
export const MODEL_TOOLS = initializeTools();
-15
View File
@@ -5,21 +5,6 @@ import {
} from "@langchain/core/messages";
import { initChatModel } from "langchain/chat_models/universal";
export function curry<
// eslint-disable-next-line @typescript-eslint/no-explicit-any
F extends (...args: any[]) => any,
P extends Partial<Parameters<F>[0]> = Partial<Parameters<F>[0]>,
>(fn: F, partialArg: P) {
return function (
this: unknown,
arg: Omit<Parameters<F>[0], keyof P> & Partial<P>,
...rest: Parameters<F> extends [unknown, ...infer R] ? R : never
): ReturnType<F> {
const mergedArg = { ...partialArg, ...arg } as Parameters<F>[0];
return fn.apply(this, [mergedArg, ...rest]) as ReturnType<F>;
};
}
/**
* Helper function to extract text content from a complex message.
*
+4
View File
@@ -2,6 +2,10 @@ import { describe, it, expect } from "@jest/globals";
import { graph } from "../src/enrichment_agent/graph.js";
describe("Web Research Agent", () => {
beforeAll(() => {
process.env.TAVILY_API_KEY = "dummy";
});
it("should initialize and compile the graph", () => {
expect(graph).toBeDefined();
expect(graph.name).toBe("ResearchTopic");
+11 -4
View File
@@ -656,10 +656,10 @@
zod "^3.22.3"
zod-to-json-schema "^3.22.5"
"@langchain/core@^0.3.2":
version "0.3.2"
resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.2.tgz#aff6d83149a40e0e735910f583aca0f1dd7d1bab"
integrity sha512-FeoDOStP8l1YdxgykpXnVoEnl4lxGNSOdYzUJN/EdFtkc6cIjDDS5+xewajme0+egaUsO4tGLezKaFpoWxAyQA==
"@langchain/core@^0.3.3":
version "0.3.3"
resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.3.tgz#af12fd767ff2fcedb0a71bd79e6588d7dd52b6b6"
integrity sha512-WAtkmhbdl2T41qzimTzhb3pXCHQxO4onqxzPxgdf3KftQdTwLq0YYBDhozRMZLNAd/+cfH0ymZGaZSsnc9Ogsg==
dependencies:
ansi-styles "^5.0.0"
camelcase "6"
@@ -876,6 +876,13 @@
dependencies:
undici-types "~5.26.4"
"@types/node@^20.14.8":
version "20.16.5"
resolved "https://registry.yarnpkg.com/@types/node/-/node-20.16.5.tgz#d43c7f973b32ffdf9aa7bd4f80e1072310fd7a53"
integrity sha512-VwYCweNo3ERajwy0IUlqqcyZ8/A7Zwa9ZP3MnENWcB11AejO+tLy3pu850goUW2FC/IJMdZUfKpX/yxL1gymCA==
dependencies:
undici-types "~6.19.2"
"@types/retry@0.12.0":
version "0.12.0"
resolved "https://registry.yarnpkg.com/@types/retry/-/retry-0.12.0.tgz#2b35eccfcee7d38cd72ad99232fbd58bffb3c84d"