mirror of
https://github.com/langchain-ai/langchain-benchmarks.git
synced 2026-07-01 01:37:54 -04:00
@@ -1,9 +1,15 @@
|
||||
"""Package for helping to evaluate agent runs."""
|
||||
from langchain_benchmarks.tool_usage.agents import apply_agent_executor_adapter
|
||||
from langchain_benchmarks.tool_usage.agents import (
|
||||
CustomRunnableAgentFactory,
|
||||
StandardAgentFactory,
|
||||
apply_agent_executor_adapter,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.evaluators import get_eval_config
|
||||
|
||||
# Please keep this list sorted!
|
||||
__all__ = [
|
||||
"apply_agent_executor_adapter",
|
||||
"CustomRunnableAgentFactory",
|
||||
"get_eval_config",
|
||||
"StandardAgentFactory",
|
||||
]
|
||||
|
||||
@@ -1,25 +1,11 @@
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
from langchain_benchmarks.tool_usage.agents.anthropic_tool_user import (
|
||||
AnthropicToolUserFactory,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.factory import (
|
||||
CustomAgentFactory,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.agents.openai_assistant import (
|
||||
OpenAIAssistantFactory,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.agents.openai_functions import OpenAIAgentFactory
|
||||
from langchain_benchmarks.tool_usage.agents.runnable_agent import (
|
||||
CustomRunnableAgentFactory,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.agents.tool_using_agent import StandardAgentFactory
|
||||
|
||||
__all__ = [
|
||||
"OpenAIAgentFactory",
|
||||
"OpenAIAssistantFactory",
|
||||
"apply_agent_executor_adapter",
|
||||
"CustomAgentFactory",
|
||||
"AnthropicToolUserFactory",
|
||||
"CustomRunnableAgentFactory",
|
||||
"StandardAgentFactory",
|
||||
]
|
||||
|
||||
@@ -1,271 +0,0 @@
|
||||
"""Wrapper around the anthropic tool user SDK.
|
||||
|
||||
The anthropic tool user SDK is an alpha release so this code will likely be
|
||||
changed or deleted in the future. It's here simply to make it easier to benchmark
|
||||
the performance of the existing tool user SDK, to compare it with the performance
|
||||
of other implementations.
|
||||
"""
|
||||
|
||||
from importlib.util import find_spec
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
|
||||
from langchain.tools import StructuredTool
|
||||
from langchain_core.callbacks.manager import trace_as_chain_group
|
||||
from langchain_core.runnables import Runnable, RunnableConfig, RunnableLambda
|
||||
|
||||
from langchain_benchmarks import rate_limiting
|
||||
from langchain_benchmarks.schema import ToolUsageTask
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
|
||||
|
||||
def convert_langchain_tool_to_tool_user_tool(lc_tool: StructuredTool) -> Any:
|
||||
"""Convert a langchain tool to a tool user tool."""
|
||||
from tool_use_package.tools.base_tool import BaseTool
|
||||
|
||||
class DynamicTool(BaseTool):
|
||||
def use_tool(self, **kwargs):
|
||||
return lc_tool(kwargs)
|
||||
|
||||
schema = lc_tool.args_schema.schema()
|
||||
|
||||
properties = schema["properties"]
|
||||
parameters = []
|
||||
# Is this needed or is string OK?
|
||||
type_adapter = {
|
||||
"string": "str", # str or string?
|
||||
"integer": "int",
|
||||
"number": "float",
|
||||
"boolean": "bool",
|
||||
}
|
||||
for key, value in properties.items():
|
||||
parameters.append(
|
||||
{
|
||||
"name": key,
|
||||
"type": type_adapter.get(value["type"], value["type"]),
|
||||
"description": value.get("description", ""),
|
||||
}
|
||||
)
|
||||
|
||||
return DynamicTool(lc_tool.name, lc_tool.description, parameters)
|
||||
|
||||
|
||||
def _handle_tool_inputs(
|
||||
tool_inputs: List[Dict[str, Any]],
|
||||
tools: Sequence[StructuredTool],
|
||||
config: Optional[RunnableConfig] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Handle tool inputs."""
|
||||
tool_by_name = {tool.name: tool for tool in tools}
|
||||
tool_error: Optional[str] = None
|
||||
tool_outputs = []
|
||||
for tool_input in tool_inputs:
|
||||
tool_name = tool_input["tool_name"]
|
||||
tool_arguments = tool_input["tool_arguments"]
|
||||
tool = tool_by_name[tool_name]
|
||||
try:
|
||||
tool_result = tool.invoke(tool_arguments, config=config)
|
||||
except Exception as e: # Break on first error
|
||||
tool_error = str(e)
|
||||
tool_outputs = None
|
||||
break
|
||||
tool_outputs.append(
|
||||
{
|
||||
"tool_name": tool_name,
|
||||
"tool_result": tool_result,
|
||||
}
|
||||
)
|
||||
return {
|
||||
"role": "tool_outputs",
|
||||
"tool_outputs": tool_outputs,
|
||||
"tool_error": tool_error,
|
||||
}
|
||||
|
||||
|
||||
def run_anthropic_agent_simple(
|
||||
tools: Sequence[StructuredTool],
|
||||
user_message: str,
|
||||
*,
|
||||
max_iterations: int = 30,
|
||||
config: Optional[RunnableConfig] = None,
|
||||
**kwargs,
|
||||
) -> List[dict]:
|
||||
"""Make an anthropic agent."""
|
||||
from tool_use_package.tool_user import ToolUser
|
||||
|
||||
verbose = kwargs.pop("verbose", False)
|
||||
|
||||
tool_user = ToolUser(
|
||||
[convert_langchain_tool_to_tool_user_tool(tool) for tool in tools], **kwargs
|
||||
)
|
||||
messages = [
|
||||
{
|
||||
"role": "human",
|
||||
"content": user_message,
|
||||
"tool_error": None,
|
||||
"tool_outputs": [],
|
||||
"tool_inputs": [],
|
||||
}
|
||||
]
|
||||
with trace_as_chain_group(
|
||||
"Anthropic Agent Run",
|
||||
inputs={"user_message": user_message},
|
||||
callback_manager=config.get("callbacks", None) if config else None,
|
||||
) as group_manager:
|
||||
for num_iteration in range(max_iterations):
|
||||
with trace_as_chain_group(
|
||||
f"Anthropic Agent Iteration {num_iteration}",
|
||||
inputs={"messages": messages},
|
||||
callback_manager=group_manager.parent_run_manager.get_child(),
|
||||
) as iteration_manager:
|
||||
last_message = tool_user.use_tools(
|
||||
messages, execution_mode="manual", verbose=verbose
|
||||
)
|
||||
new_messages = [last_message]
|
||||
|
||||
if last_message["role"] == "tool_inputs":
|
||||
tool_inputs = last_message["tool_inputs"]
|
||||
new_message = _handle_tool_inputs(
|
||||
tool_inputs,
|
||||
tools,
|
||||
config={
|
||||
"callbacks": iteration_manager.parent_run_manager.get_child(),
|
||||
},
|
||||
)
|
||||
new_messages.append(new_message)
|
||||
|
||||
iteration_manager.on_chain_end(outputs=new_messages)
|
||||
messages.extend(new_messages)
|
||||
|
||||
# Finally break if the last message is from the assistant
|
||||
if last_message["role"] == "assistant":
|
||||
break
|
||||
else:
|
||||
raise ValueError("Max iterations reached")
|
||||
group_manager.on_chain_end(outputs=messages)
|
||||
return messages
|
||||
|
||||
|
||||
def convert_messages_to_finalized_output(
|
||||
messages: List[Dict[str, Any]],
|
||||
) -> Dict[str, Any]:
|
||||
"""Convert the history of messages into the expected output for eval.
|
||||
|
||||
This matches the agent executor output which has the following structure:
|
||||
|
||||
{
|
||||
"output": "The output of the agent",
|
||||
"intermediate_steps": [
|
||||
(
|
||||
AgentAction(
|
||||
tool="add_x_y",
|
||||
tool_input={"x": 2.0, "y": 5.0},
|
||||
log="Invoking tool `add_x_y` with `{'x': 2.0, 'y': 5.0}`",
|
||||
),
|
||||
9.0,
|
||||
)
|
||||
],
|
||||
"state": Any, # Optional key for tasks that involve manipulation of an env.
|
||||
}
|
||||
"""
|
||||
if not messages:
|
||||
raise ValueError("Expected at least one message")
|
||||
|
||||
last_message = messages[-1]
|
||||
|
||||
if last_message["role"] != "assistant":
|
||||
raise ValueError(
|
||||
f"Expected the last message to be from the assistant. "
|
||||
f"Instead got {last_message}."
|
||||
)
|
||||
|
||||
actual_steps = []
|
||||
|
||||
for message in messages:
|
||||
if "role" not in message:
|
||||
raise ValueError(f"Expected role in message {message}")
|
||||
role = message["role"]
|
||||
|
||||
if role == "tool_inputs":
|
||||
# Get the name of the tool used
|
||||
for tool_input in message["tool_inputs"]:
|
||||
actual_steps.append(tool_input["tool_name"])
|
||||
|
||||
return {
|
||||
"output": last_message["content"],
|
||||
"actual_steps": actual_steps,
|
||||
}
|
||||
|
||||
|
||||
def create_agent(tools: Sequence[StructuredTool]) -> RunnableLambda:
|
||||
"""Create an agent."""
|
||||
|
||||
def run_agent(
|
||||
input: dict, config: Optional[RunnableConfig] = None, **kwargs
|
||||
) -> dict:
|
||||
"""Run the agent."""
|
||||
messages = run_anthropic_agent_simple(
|
||||
tools, input["input"], config=config, **kwargs
|
||||
)
|
||||
return convert_messages_to_finalized_output(messages)
|
||||
|
||||
return RunnableLambda(run_agent)
|
||||
|
||||
|
||||
class AnthropicToolUserFactory:
|
||||
def __init__(
|
||||
self,
|
||||
task: ToolUsageTask,
|
||||
*,
|
||||
rate_limiter: Optional[rate_limiting.RateLimiter] = None,
|
||||
) -> None:
|
||||
"""Create an OpenAI agent factory for the given task.
|
||||
|
||||
|
||||
Args:
|
||||
task: The task to create an agent factory for.
|
||||
rate_limiter: The rate limiter to use
|
||||
"""
|
||||
self.task = task
|
||||
self.rate_limiter = rate_limiter
|
||||
if not find_spec("tool_use_package"):
|
||||
raise ImportError(
|
||||
'Could not import "tool_use_package". Please '
|
||||
"follow instructions here to install "
|
||||
"https://github.com/anthropics/anthropic-tools/tree/main"
|
||||
)
|
||||
|
||||
def __call__(self, **kwargs: Any) -> Runnable:
|
||||
env = self.task.create_environment()
|
||||
|
||||
def _add_task_instructions(
|
||||
input: dict, config: Optional[RunnableConfig] = None, **kwargs
|
||||
) -> dict:
|
||||
"""Add task instructions to the question."""
|
||||
if not isinstance(input, dict) or "question" not in input:
|
||||
raise ValueError(
|
||||
f"Expected input to be a dict with key `question`. "
|
||||
f"Found {type(input)}."
|
||||
)
|
||||
|
||||
input = input.copy()
|
||||
input["question"] = (
|
||||
f"{self.task.instructions}\nWrite down your answer, "
|
||||
f"but do not explain it. Input: `{input['question']}`"
|
||||
)
|
||||
return input
|
||||
|
||||
agent = create_agent(env.tools) # type: ignore
|
||||
# Returns `state` in the output if the environment has a state reader
|
||||
# makes sure that `output` is always in the output
|
||||
|
||||
if kwargs:
|
||||
agent = agent.bind(**kwargs)
|
||||
|
||||
runnable = _add_task_instructions | apply_agent_executor_adapter(
|
||||
agent, state_reader=env.read_state
|
||||
)
|
||||
|
||||
if self.rate_limiter: # Add a rate limiter
|
||||
runnable = rate_limiting.with_rate_limit(runnable, self.rate_limiter)
|
||||
|
||||
return runnable
|
||||
@@ -0,0 +1,11 @@
|
||||
import abc
|
||||
|
||||
from langchain_core.runnables import Runnable
|
||||
|
||||
|
||||
class AgentFactory(abc.ABC):
|
||||
"""Abstract class for agent factory"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self) -> Runnable:
|
||||
"""Create a new agent"""
|
||||
@@ -1,133 +0,0 @@
|
||||
from typing import List, Literal, Optional, Sequence, Tuple, Union
|
||||
|
||||
from langchain.agents import AgentOutputParser
|
||||
from langchain.prompts.chat import ChatPromptTemplate
|
||||
from langchain.schema.runnable import Runnable
|
||||
from langchain.tools import StructuredTool
|
||||
from langchain_core.agents import AgentAction, AgentFinish
|
||||
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
|
||||
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
|
||||
from langchain_core.prompts import MessagesPlaceholder
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
from langchain_benchmarks import RateLimiter
|
||||
from langchain_benchmarks.rate_limiting import with_rate_limit
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
|
||||
AstPrinter,
|
||||
FunctionResult,
|
||||
TypeScriptEncoder,
|
||||
XMLEncoder,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.prompts import (
|
||||
_AGENT_INSTRUCTIONS_BLOB_STYLE,
|
||||
)
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import (
|
||||
convert_tool_to_function_definition,
|
||||
)
|
||||
|
||||
|
||||
def format_steps_for_chat(
|
||||
intermediate_steps: List[Tuple[AgentAction, str]],
|
||||
ast_printer: AstPrinter,
|
||||
) -> List[BaseMessage]:
|
||||
"""Format the steps."""
|
||||
messages = []
|
||||
for action, observation in intermediate_steps:
|
||||
# Action messages contains the tool invocation request from the LLM
|
||||
# Now add the result of the tool invocation.
|
||||
|
||||
if action.tool == "_Exception":
|
||||
messages.append(
|
||||
AIMessage(
|
||||
content=action.log,
|
||||
)
|
||||
)
|
||||
messages.append(
|
||||
# Tool input is the error message for the exception
|
||||
HumanMessage(content=action.tool_input)
|
||||
)
|
||||
else:
|
||||
messages.extend(action.messages)
|
||||
function_result: FunctionResult = {
|
||||
"name": action.tool,
|
||||
"error": None,
|
||||
"result": observation,
|
||||
}
|
||||
messages.append(
|
||||
HumanMessage(
|
||||
content=ast_printer.visit_function_result(function_result),
|
||||
)
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
class AgentInput(TypedDict):
|
||||
"""The input to the agent."""
|
||||
|
||||
input: str
|
||||
"""The input to the agent."""
|
||||
intermediate_steps: List[Tuple[AgentAction, str]]
|
||||
"""The intermediate steps taken by the agent."""
|
||||
examples: NotRequired[List[BaseMessage]]
|
||||
"""A list of messages that can be used to form example traces."""
|
||||
|
||||
|
||||
def create_agent(
|
||||
model: Union[BaseChatModel, BaseLanguageModel],
|
||||
tools: Sequence[StructuredTool],
|
||||
parser: AgentOutputParser,
|
||||
*,
|
||||
ast_printer: Union[AstPrinter, Literal["xml"]] = "xml",
|
||||
rate_limiter: Optional[RateLimiter] = None,
|
||||
) -> Runnable[AgentInput, Union[AgentAction, AgentFinish]]:
|
||||
"""Create an agent for a chat model."""
|
||||
if isinstance(ast_printer, str):
|
||||
if ast_printer == "xml":
|
||||
ast_printer_ = XMLEncoder()
|
||||
elif ast_printer == "typescript":
|
||||
ast_printer_ = TypeScriptEncoder()
|
||||
else:
|
||||
raise ValueError(f"Unknown ast printer: {ast_printer}")
|
||||
elif isinstance(ast_printer, AstPrinter):
|
||||
ast_printer_ = ast_printer
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Expected AstPrinter or str, got {type(ast_printer)} for `ast_printer`"
|
||||
)
|
||||
|
||||
function_definitions = [convert_tool_to_function_definition(tool) for tool in tools]
|
||||
tool_description = ast_printer_.visit_function_definitions(function_definitions)
|
||||
|
||||
template = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", _AGENT_INSTRUCTIONS_BLOB_STYLE),
|
||||
MessagesPlaceholder("examples"), # Can use to add example traces
|
||||
("human", "{input}"),
|
||||
MessagesPlaceholder("history"),
|
||||
]
|
||||
).partial(tool_description=tool_description)
|
||||
|
||||
# For the time being, hard-coding the fact that we're using a <tool> tag.
|
||||
model = model.bind(stop=["</tool>"])
|
||||
|
||||
if rate_limiter:
|
||||
# Apply a rate limiter if it was provided
|
||||
model = with_rate_limit(model, rate_limiter)
|
||||
|
||||
agent = (
|
||||
{
|
||||
"input": lambda x: x["input"],
|
||||
"history": lambda x: format_steps_for_chat(
|
||||
x["intermediate_steps"], ast_printer_
|
||||
),
|
||||
"examples": lambda x: x.get("examples", []),
|
||||
}
|
||||
| template
|
||||
| model
|
||||
| parser
|
||||
)
|
||||
return agent
|
||||
@@ -1,240 +0,0 @@
|
||||
"""Prototyping code for rendering function definitions, invocations, and results.
|
||||
|
||||
Types are simplified for now to `str`.
|
||||
|
||||
We should actually support something like pydantic or jsonschema for the types, so
|
||||
we can expand them recursively for nested types.
|
||||
"""
|
||||
import abc
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
|
||||
class Parameter(TypedDict):
|
||||
"""Representation for a parameter."""
|
||||
|
||||
name: str
|
||||
type: str
|
||||
description: str
|
||||
|
||||
|
||||
class Arguments(TypedDict):
|
||||
"""Arguments are passed to a function during function invocation."""
|
||||
|
||||
name: Optional[str]
|
||||
value: Any
|
||||
|
||||
|
||||
class ReturnValue(TypedDict):
|
||||
"""Representation for a return value of a function call."""
|
||||
|
||||
type: str
|
||||
description: NotRequired[str]
|
||||
|
||||
|
||||
class FunctionDefinition(TypedDict):
|
||||
"""Representation for a function."""
|
||||
|
||||
name: str
|
||||
description: str # Function description
|
||||
parameters: List[Parameter]
|
||||
return_value: ReturnValue
|
||||
|
||||
|
||||
class FunctionInvocation(TypedDict):
|
||||
"""Representation for a function invocation."""
|
||||
|
||||
id: NotRequired[str]
|
||||
name: str
|
||||
arguments: List[Arguments]
|
||||
|
||||
|
||||
class FunctionResult(TypedDict):
|
||||
"""Representation for a function result."""
|
||||
|
||||
id: NotRequired[str]
|
||||
name: str
|
||||
result: Optional[str]
|
||||
error: Optional[str]
|
||||
|
||||
|
||||
class Visitor(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
|
||||
"""Render a function."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def visit_function_definitions(
|
||||
self, function_definitions: List[FunctionDefinition]
|
||||
) -> str:
|
||||
"""Render a function."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def visit_function_invocation(self, function_invocation: FunctionInvocation) -> str:
|
||||
"""Render a function invocation."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def visit_function_result(self, function_result: FunctionResult) -> str:
|
||||
"""Render a function result."""
|
||||
|
||||
|
||||
class AstPrinter(Visitor):
|
||||
"""Print the AST."""
|
||||
|
||||
|
||||
class XMLEncoder(AstPrinter):
|
||||
def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
|
||||
"""Render a function."""
|
||||
parameters_lines = []
|
||||
|
||||
for parameter in function_definition["parameters"]:
|
||||
parameters_lines.extend(
|
||||
[
|
||||
"<parameter>",
|
||||
f"<name>{parameter['name']}</name>",
|
||||
f"<type>{parameter['type']}</type>",
|
||||
f"<description>{parameter['description']}</description>",
|
||||
"</parameter>",
|
||||
]
|
||||
)
|
||||
lines = [
|
||||
"<function>",
|
||||
f"<function_name>{function_definition['name']}</function_name>",
|
||||
"<description>",
|
||||
f"{function_definition['description']}",
|
||||
"</description>",
|
||||
"<parameters>",
|
||||
*parameters_lines,
|
||||
"</parameters>",
|
||||
"<return_value>",
|
||||
f"<type>{function_definition['return_value']['type']}</type>",
|
||||
]
|
||||
if function_definition["return_value"].get("description"):
|
||||
lines.append(
|
||||
f"<description>{function_definition['return_value']['description']}"
|
||||
f"</description>"
|
||||
)
|
||||
|
||||
lines.extend(["</return_value>", "</function>"])
|
||||
return "\n".join(lines)
|
||||
|
||||
def visit_function_definitions(
|
||||
self, function_definitions: List[FunctionDefinition]
|
||||
) -> str:
|
||||
"""Render a function."""
|
||||
strs = [
|
||||
self.visit_function_definition(function_definition)
|
||||
for function_definition in function_definitions
|
||||
]
|
||||
return "<functions>\n" + "\n".join(strs) + "\n</functions>"
|
||||
|
||||
def visit_function_invocation(self, invocation: FunctionInvocation) -> str:
|
||||
"""Render a function invocation."""
|
||||
arguments_as_strings = [
|
||||
"<argument>\n"
|
||||
f"<name>{argument['name']}</name>\n"
|
||||
f"<value>{argument['value']}</value>\n"
|
||||
"</argument>\n"
|
||||
for argument in invocation["arguments"]
|
||||
]
|
||||
lines = ["<function_invocation>"]
|
||||
|
||||
if invocation.get("id"):
|
||||
lines.append(f"<id>{invocation['id']}</id>")
|
||||
|
||||
lines.extend(
|
||||
[
|
||||
f"<function_name>{invocation['name']}</function_name>\n"
|
||||
"<arguments>\n"
|
||||
f"{''.join(arguments_as_strings)}" # Already includes trailing newline
|
||||
"</arguments>\n"
|
||||
"</function_invocation>"
|
||||
]
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def visit_function_result(self, function_result: FunctionResult) -> str:
|
||||
"""Render a function result."""
|
||||
lines = [
|
||||
"<function_result>",
|
||||
]
|
||||
|
||||
if function_result.get("id"):
|
||||
lines.append(f"<id>{function_result['id']}</id>")
|
||||
|
||||
lines.append(f"<function_name>{function_result['name']}</function_name>")
|
||||
|
||||
if function_result["error"]:
|
||||
lines.extend(
|
||||
[
|
||||
f"<error>{function_result['error']}</error>",
|
||||
]
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f"<result>{function_result['result']}</result>",
|
||||
)
|
||||
|
||||
lines.append("</function_result>")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class TypeScriptEncoder(AstPrinter):
|
||||
def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
|
||||
"""Render a function."""
|
||||
parameters_as_strings = [
|
||||
f"{parameter['name']}: {parameter['type']}"
|
||||
for parameter in function_definition["parameters"]
|
||||
]
|
||||
# Let's use JSdoc style comments
|
||||
# First the function description
|
||||
lines = [
|
||||
f"// {function_definition['description']}",
|
||||
# Then the parameter descriptions
|
||||
*[
|
||||
f"// @param {parameter['name']} {parameter['description']}"
|
||||
for parameter in function_definition["parameters"]
|
||||
],
|
||||
# Then the return value description
|
||||
f"// @returns {function_definition['return_value']['description']}",
|
||||
# Then the function definition
|
||||
f"function {function_definition['name']}("
|
||||
f"{', '.join(parameters_as_strings)}): "
|
||||
f"{function_definition['return_value']['type']};",
|
||||
]
|
||||
|
||||
# finally join
|
||||
function = "\n".join(lines)
|
||||
return function
|
||||
|
||||
def visit_function_definitions(
|
||||
self, function_definitions: List[FunctionDefinition]
|
||||
) -> str:
|
||||
"""Render a function."""
|
||||
strs = [
|
||||
self.visit_function_definition(function_definition)
|
||||
for function_definition in function_definitions
|
||||
]
|
||||
return "\n\n".join(strs)
|
||||
|
||||
def visit_function_invocation(self, invocation: FunctionInvocation) -> str:
|
||||
"""Render a function invocation."""
|
||||
arguments_as_strings = [
|
||||
f"{argument['name']}: {argument['value']}"
|
||||
for argument in invocation["arguments"]
|
||||
]
|
||||
lines = [f"{invocation['name']}(" f"{', '.join(arguments_as_strings)});"]
|
||||
return "\n".join(lines)
|
||||
|
||||
def visit_function_result(self, function_result: FunctionResult) -> str:
|
||||
"""Render a function result."""
|
||||
lines = []
|
||||
if function_result["error"]:
|
||||
lines.append(f"ERROR: {function_result['error']}")
|
||||
else:
|
||||
lines.append(f"> {function_result['result']}")
|
||||
if function_result.get("id"):
|
||||
lines.append(f"// ID: {function_result['id']}")
|
||||
return "\n".join(lines)
|
||||
@@ -1,93 +0,0 @@
|
||||
"""Factory for creating agents for the tool usage task."""
|
||||
from typing import Optional
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain_core.runnables import Runnable, RunnableConfig
|
||||
|
||||
from langchain_benchmarks import RateLimiter, model_registry
|
||||
from langchain_benchmarks.schema import ToolUsageTask
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.agent import create_agent
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.parser import (
|
||||
GenericAgentParser,
|
||||
)
|
||||
|
||||
|
||||
class CustomAgentFactory:
|
||||
"""A factory for creating tool using agents.
|
||||
|
||||
A factory for agents that do not leverage any special JSON mode for
|
||||
function usage; instead all function invocation behavior is implemented solely
|
||||
through prompt engineering and parsing.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task: ToolUsageTask,
|
||||
model: str,
|
||||
*,
|
||||
rate_limiter: Optional[RateLimiter] = None,
|
||||
num_retries: int = 0,
|
||||
) -> None:
|
||||
"""Create an agent factory for the given tool usage task.
|
||||
|
||||
Args:
|
||||
task: The task to create an agent factory for
|
||||
model: model name (check model_registry)
|
||||
rate_limiter: The rate limiter to use if provided
|
||||
num_retries: The number of times to retry the agent if it fails
|
||||
"""
|
||||
if model not in model_registry:
|
||||
raise ValueError(f"Unknown model: {model}")
|
||||
self.task = task
|
||||
self.model = model
|
||||
self.rate_limiter = rate_limiter
|
||||
self.num_retries = num_retries
|
||||
|
||||
def __call__(self) -> Runnable:
|
||||
if isinstance(self.model, str):
|
||||
registered_model = model_registry.get_model(self.model)
|
||||
if registered_model is None:
|
||||
raise ValueError(f"Unknown model: {self.model}")
|
||||
model = registered_model.get_model(model_params={"temperature": 0})
|
||||
else:
|
||||
model = self.model
|
||||
|
||||
def _add_task_instructions(
|
||||
input: dict, config: Optional[RunnableConfig] = None, **kwargs
|
||||
) -> dict:
|
||||
"""Add task instructions to the question."""
|
||||
if not isinstance(input, dict):
|
||||
raise ValueError(
|
||||
f"Expected input to be a dict with key `question`. "
|
||||
f"Found {type(input)}."
|
||||
)
|
||||
input = input.copy()
|
||||
input["question"] = (
|
||||
f"{self.task.instructions}\nWrite down your answer, "
|
||||
f"but do not explain it. Input: `{input['question']}`"
|
||||
)
|
||||
return input
|
||||
|
||||
env = self.task.create_environment()
|
||||
|
||||
agent = create_agent(
|
||||
model,
|
||||
env.tools,
|
||||
GenericAgentParser(wrapping_xml_tag="tool", require_closing_xml_tag=False),
|
||||
rate_limiter=self.rate_limiter,
|
||||
)
|
||||
if self.num_retries > 0:
|
||||
agent = agent.with_retry(
|
||||
stop_after_attempt=self.num_retries + 1,
|
||||
)
|
||||
executor = AgentExecutor(
|
||||
agent=agent,
|
||||
tools=env.tools,
|
||||
handle_parsing_errors=True,
|
||||
return_intermediate_steps=True,
|
||||
)
|
||||
|
||||
return _add_task_instructions | apply_agent_executor_adapter(
|
||||
executor, state_reader=env.read_state
|
||||
)
|
||||
@@ -1,122 +0,0 @@
|
||||
import ast
|
||||
import re
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
from langchain.agents import AgentOutputParser
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.agents import AgentAction, AgentActionMessageLog, AgentFinish
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
|
||||
class _ToolInvocationRequest(BaseModel):
|
||||
"""Light-weight pydantic model for validating the raw tool invocation request.
|
||||
|
||||
The purpose of this model, is to make sure that whatever as parsed from
|
||||
the raw llm output has `tool_name` and potential `arguments` fields, and
|
||||
nothing else.
|
||||
"""
|
||||
|
||||
tool_name: str
|
||||
# OK parameterless tools which do not take arguments
|
||||
arguments: Optional[Dict] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class GenericAgentParser(AgentOutputParser):
|
||||
"""A generalized parser that makes it easier to parameterize different parsing."""
|
||||
|
||||
wrapping_xml_tag: str
|
||||
"""The tag that wraps the function invocation request.
|
||||
|
||||
For example, if "tool", then the function invocation request should be wrapped
|
||||
in <tool>...</tool>.
|
||||
"""
|
||||
require_closing_xml_tag: bool = False
|
||||
"""Whether we should require a closing tag for the wrapping_xml_tag.
|
||||
|
||||
For example, if True, then the function invocation request should be wrapped
|
||||
"""
|
||||
|
||||
def parse(self, text: str) -> Union[AgentFinish, AgentAction]:
|
||||
"""Parse the output of the agent."""
|
||||
open_tag = f"<{self.wrapping_xml_tag}>"
|
||||
close_tag = f"</{self.wrapping_xml_tag}>"
|
||||
if open_tag in text:
|
||||
# This is a hack to make sure that </tool> is always present
|
||||
# in the output if <tool>. </tool> may be a stop sequence for the
|
||||
# language model, so depending on implementation
|
||||
# the stop sequence may be cut off.
|
||||
# There might be a better way to do this, but this works and
|
||||
# is simple.
|
||||
if not self.require_closing_xml_tag:
|
||||
text += close_tag
|
||||
|
||||
pattern = rf"{open_tag}(?P<invocation>.*?){close_tag}"
|
||||
match = re.search(pattern, text, re.DOTALL)
|
||||
if match:
|
||||
content = match.group("invocation").strip()
|
||||
return parse_invocation(content, self.wrapping_xml_tag)
|
||||
|
||||
return AgentFinish(
|
||||
log=text,
|
||||
return_values={
|
||||
"output": text,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def parse_invocation(text: str, tag: str) -> AgentAction:
|
||||
"""Parse the content of the function invocation.
|
||||
|
||||
Args:
|
||||
text: The text to parse.
|
||||
tag: The tag that wraps the function invocation request.
|
||||
|
||||
Returns:
|
||||
An AgentAction that corresponds to the function invocation.
|
||||
|
||||
Raises:
|
||||
OutputParserException: If the parsing fails.
|
||||
|
||||
This exception is meant to be caught by the agent executor and
|
||||
handled appropriately to provide feedback to the LLM.
|
||||
"""
|
||||
ai_content = f"<{tag}>{text}</{tag}>\n"
|
||||
|
||||
try:
|
||||
result = ast.literal_eval(text)
|
||||
except BaseException as e:
|
||||
# Convert this to something controllable by the user.
|
||||
err_msg = (
|
||||
f"ERROR: Please use the format "
|
||||
f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}</{tag}>\n'
|
||||
)
|
||||
|
||||
raise OutputParserException(
|
||||
error=e,
|
||||
llm_output=ai_content,
|
||||
observation=err_msg,
|
||||
send_to_llm=True,
|
||||
)
|
||||
|
||||
try:
|
||||
request = _ToolInvocationRequest.validate(result)
|
||||
except Exception as e: # Using broad exception since it's not just ValidationError
|
||||
# Can also raise DictError if result is not a dict.
|
||||
err_msg = (
|
||||
f"ERROR: Please use the format "
|
||||
f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}</{tag}>\n'
|
||||
)
|
||||
raise OutputParserException(
|
||||
error=e,
|
||||
llm_output=ai_content,
|
||||
send_to_llm=True,
|
||||
observation=err_msg,
|
||||
)
|
||||
|
||||
return AgentActionMessageLog(
|
||||
message_log=[AIMessage(content=ai_content)],
|
||||
tool=request.tool_name,
|
||||
tool_input=request.arguments,
|
||||
log=f"\nInvoking {request.tool_name}: {request.arguments}\n\t",
|
||||
)
|
||||
@@ -1,42 +0,0 @@
|
||||
AGENT_INSTRUCTIONS_XML_FORMAT = """\
|
||||
In this environment you have access to a set of tools you can use to answer the user's question.
|
||||
|
||||
You may call them like this:
|
||||
<function_calls>
|
||||
<invoke>
|
||||
<tool_name>$TOOL_NAME</tool_name>
|
||||
<parameters>
|
||||
<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
|
||||
...
|
||||
</parameters>
|
||||
</invoke>
|
||||
</function_calls>
|
||||
|
||||
Here are the tools available:
|
||||
|
||||
{tool_description}
|
||||
""" # noqa: E501
|
||||
|
||||
_AGENT_INSTRUCTIONS_BLOB_STYLE = """\
|
||||
In this environment you have access to a set of tools you can use to answer the user's question.
|
||||
|
||||
Here are the tools available:
|
||||
|
||||
{tool_description}
|
||||
|
||||
You may call one tool at a time using a format that includes <tool> and </tool> tag.
|
||||
|
||||
Inside the tag the content is a python dictionary that uses python literals (e.g., numbers, strings, lists, dictionaries, etc.) to specify the tool invocation.
|
||||
|
||||
It must match the schema of the function as described in the tool description.
|
||||
"arguments" is a dictionary of the arguments to the function.
|
||||
|
||||
<tool>
|
||||
{{
|
||||
"tool_name": $TOOL_NAME,
|
||||
"arguments": $ARGUMENTS
|
||||
}}
|
||||
</tool>
|
||||
|
||||
If you do not know the answer use more tools. You can only take a single action at a time.\
|
||||
""" # noqa: E501
|
||||
@@ -1,57 +0,0 @@
|
||||
"""Utilities to extract information from langchain tools for use in prompts."""
|
||||
import inspect
|
||||
from textwrap import dedent
|
||||
from typing import List
|
||||
|
||||
from langchain.tools.base import StructuredTool
|
||||
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
|
||||
FunctionDefinition,
|
||||
Parameter,
|
||||
)
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
def get_parameters_from_tool(tool: StructuredTool) -> List[Parameter]:
|
||||
"""Convert a langchain tool to a tool user tool."""
|
||||
schema = tool.args_schema.schema()
|
||||
|
||||
properties = schema["properties"]
|
||||
parameters = []
|
||||
# Is this needed or is string OK?
|
||||
type_adapter = {
|
||||
"string": "str", # str or string?
|
||||
"integer": "int",
|
||||
"number": "float",
|
||||
"boolean": "bool",
|
||||
}
|
||||
for key, value in properties.items():
|
||||
parameters.append(
|
||||
{
|
||||
"name": key,
|
||||
"type": type_adapter.get(value["type"], value["type"]),
|
||||
"description": value.get("description", ""),
|
||||
}
|
||||
)
|
||||
|
||||
return parameters
|
||||
|
||||
|
||||
#
|
||||
def convert_tool_to_function_definition(tool: StructuredTool) -> FunctionDefinition:
|
||||
"""Convert a langchain tool to a tool user tool."""
|
||||
# Here we re-inspect the underlying function to get the doc-string
|
||||
# since StructuredTool modifies it, but we want the raw one for maximum
|
||||
# flexibility.
|
||||
description = inspect.getdoc(tool.func)
|
||||
|
||||
parameters = get_parameters_from_tool(tool)
|
||||
return {
|
||||
"name": tool.name,
|
||||
"description": dedent(description),
|
||||
"parameters": parameters,
|
||||
"return_value": {
|
||||
"type": "Any",
|
||||
},
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
"""Code for creating an assistant factory for evaluating tool usage tasks.
|
||||
|
||||
See: https://platform.openai.com/docs/assistants/how-it-works/creating-assistants
|
||||
"""
|
||||
from typing import Optional
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.openai_assistant.base import OpenAIAssistantRunnable
|
||||
from langchain.schema.runnable import Runnable
|
||||
|
||||
from langchain_benchmarks import rate_limiting
|
||||
from langchain_benchmarks.schema import ToolUsageTask
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
|
||||
|
||||
class OpenAIAssistantFactory:
|
||||
def __init__(
|
||||
self,
|
||||
task: ToolUsageTask,
|
||||
*,
|
||||
model: str,
|
||||
rate_limiter: Optional[rate_limiting.RateLimiter] = None,
|
||||
num_retries: int = 0,
|
||||
) -> None:
|
||||
"""Create an OpenAI agent factory for the given task.
|
||||
|
||||
Args:
|
||||
task: The task to create an agent factory for.
|
||||
model: The model to use -- this must be an open AI model.
|
||||
rate_limiter: The rate limiter to use
|
||||
num_retries: The number of times to retry the assistant if it fails
|
||||
"""
|
||||
if not isinstance(model, str):
|
||||
raise ValueError(f"Expected str for model, got {type(model)}")
|
||||
self.task = task
|
||||
tools = task.create_environment().tools
|
||||
# Stateless, so we only need to create it once
|
||||
self.agent = OpenAIAssistantRunnable.create_assistant(
|
||||
name=f"{task.name} assistant",
|
||||
instructions=self.task.instructions,
|
||||
tools=tools,
|
||||
model=model,
|
||||
as_agent=True,
|
||||
)
|
||||
self.rate_limiter = rate_limiter
|
||||
self.num_retries = num_retries
|
||||
|
||||
def __call__(self) -> Runnable:
|
||||
env = self.task.create_environment()
|
||||
|
||||
agent = self.agent
|
||||
if self.rate_limiter is not None:
|
||||
# Rate limited model
|
||||
agent = rate_limiting.with_rate_limit(agent, self.rate_limiter)
|
||||
|
||||
def _map_key(x: dict):
|
||||
# Assistant expects the 'content' key explicitly
|
||||
return {
|
||||
"content": x["input"],
|
||||
**{k: v for k, v in x.items() if k != "input"},
|
||||
}
|
||||
|
||||
agent = _map_key | self.agent
|
||||
if self.num_retries > 0:
|
||||
agent = agent.with_retry(
|
||||
stop_after_attempt=self.num_retries + 1,
|
||||
)
|
||||
runnable = AgentExecutor(
|
||||
agent=agent,
|
||||
tools=env.tools,
|
||||
handle_parsing_errors=True,
|
||||
return_intermediate_steps=True,
|
||||
)
|
||||
|
||||
# Returns `state` in the output if the environment has a state reader
|
||||
# makes sure that `output` is always in the output
|
||||
return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
|
||||
@@ -1,166 +0,0 @@
|
||||
"""Code for creating an agent factory for evaluating tool usage tasks."""
|
||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.format_scratchpad.openai_tools import (
|
||||
format_to_openai_tool_messages,
|
||||
)
|
||||
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain.schema.runnable import Runnable
|
||||
from langchain.tools.render import format_tool_to_openai_tool
|
||||
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
|
||||
from langchain_core.language_models.base import LanguageModelInput
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
|
||||
from langchain_benchmarks import model_registry, rate_limiting
|
||||
from langchain_benchmarks.model_registration import RegisteredModel
|
||||
from langchain_benchmarks.schema import ToolUsageTask
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
def _bind_tools(
|
||||
llm: BaseChatModel,
|
||||
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
tool_choice: Optional[str] = None,
|
||||
json_mode: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, BaseMessage]:
|
||||
"""Bind tools (and other objects) to this chat model.
|
||||
|
||||
Args:
|
||||
tools: A list of tool definitions to bind to this chat model.
|
||||
Can be a dictionary, pydantic model, or callable. Pydantic
|
||||
models and callables will be automatically converted to
|
||||
their schema dictionary representation.
|
||||
tool_choice: Which tool to require the model to call.
|
||||
Must be the name of the single provided tool or
|
||||
"auto" to automatically determine which tool to call
|
||||
(if any).
|
||||
json_mode: Whether to set JSON mode for the tool call.
|
||||
This guarantees the model will respond in valid JSON
|
||||
(unless truncated).
|
||||
kwargs: Any additional parameters to pass to the
|
||||
:class:`~langchain.runnable.Runnable` constructor.
|
||||
|
||||
"""
|
||||
formatted_tools: List[Dict[str, Union[str, dict]]] = [
|
||||
format_tool_to_openai_tool(tool) for tool in tools
|
||||
]
|
||||
if tool_choice is not None:
|
||||
if not formatted_tools:
|
||||
raise ValueError(
|
||||
"When specifying `tool_choice`, you must provide at least one " "tool."
|
||||
)
|
||||
tool_names = [tool["function"]["name"] for tool in formatted_tools]
|
||||
if not any(tool_name == tool_choice for tool_name in tool_names):
|
||||
raise ValueError(
|
||||
f"Tool choice {tool_choice} was specified, but the only "
|
||||
f"provided tools were {tool_names}."
|
||||
)
|
||||
tool_choice_ = {"type": "function", "function": {"name": tool_choice}}
|
||||
kwargs = {**kwargs, "tool_choice": tool_choice_}
|
||||
if json_mode:
|
||||
kwargs = {**kwargs, "response_format": {"type": "json_object"}}
|
||||
return llm.bind(
|
||||
tools=formatted_tools,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class OpenAIAgentFactory:
|
||||
def __init__(
|
||||
self,
|
||||
task: ToolUsageTask,
|
||||
*,
|
||||
model: Union[
|
||||
str, RegisteredModel, BaseLanguageModel, BaseChatModel
|
||||
] = "gpt-3.5-turbo-16k",
|
||||
rate_limiter: Optional[rate_limiting.RateLimiter] = None,
|
||||
num_retries: int = 0,
|
||||
) -> None:
|
||||
"""Create an OpenAI agent factory for the given task.
|
||||
|
||||
Args:
|
||||
task: The task to create an agent factory for.
|
||||
model: The model to use -- this must be an open AI model.
|
||||
rate_limiter: The rate limiter to use
|
||||
"""
|
||||
self.task = task
|
||||
self.model = model
|
||||
self.rate_limiter = rate_limiter
|
||||
self.num_retries = num_retries
|
||||
|
||||
def _create_model(self) -> Union[BaseChatModel, BaseLanguageModel]:
|
||||
if isinstance(self.model, RegisteredModel):
|
||||
return self.model.get_model(
|
||||
model_params={"temperature": 0, "model_kwargs": {"seed": 0}}
|
||||
)
|
||||
elif isinstance(self.model, (BaseChatModel, BaseLanguageModel)):
|
||||
return self.model
|
||||
elif isinstance(self.model, str):
|
||||
if self.model in model_registry:
|
||||
registered_model = model_registry.get_model(self.model)
|
||||
model_instance = registered_model.get_model(
|
||||
model_params={"temperature": 0, "model_kwargs": {"seed": 0}}
|
||||
)
|
||||
return model_instance
|
||||
else:
|
||||
raise ValueError(f"Unknown model: {self.model}")
|
||||
else:
|
||||
raise TypeError(f"Expected str or RegisteredModel, got {type(self.model)}")
|
||||
|
||||
def create(self) -> Runnable:
|
||||
"""Agent Executor"""
|
||||
# For backwards compatibility
|
||||
return self()
|
||||
|
||||
def __call__(self) -> Runnable:
|
||||
model = self._create_model()
|
||||
env = self.task.create_environment()
|
||||
|
||||
model = _bind_tools(model, env.tools)
|
||||
|
||||
if self.rate_limiter is not None:
|
||||
# Rate limited model
|
||||
model = rate_limiting.with_rate_limit(model, self.rate_limiter)
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
(
|
||||
"system",
|
||||
self.task.instructions,
|
||||
),
|
||||
("user", "{input}"),
|
||||
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
||||
]
|
||||
)
|
||||
|
||||
runnable_agent = (
|
||||
{
|
||||
"input": lambda x: x["input"],
|
||||
"agent_scratchpad": lambda x: format_to_openai_tool_messages(
|
||||
x["intermediate_steps"]
|
||||
),
|
||||
}
|
||||
| prompt
|
||||
| model
|
||||
| OpenAIToolsAgentOutputParser()
|
||||
)
|
||||
if self.num_retries > 0:
|
||||
runnable_agent = runnable_agent.with_retry(
|
||||
stop_after_attempt=self.num_retries + 1,
|
||||
)
|
||||
runnable = AgentExecutor(
|
||||
agent=runnable_agent,
|
||||
tools=env.tools,
|
||||
handle_parsing_errors=True,
|
||||
return_intermediate_steps=True,
|
||||
)
|
||||
|
||||
# Returns `state` in the output if the environment has a state reader
|
||||
# makes sure that `output` is always in the output
|
||||
return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
|
||||
@@ -10,9 +10,10 @@ from langchain_core.runnables import Runnable
|
||||
|
||||
from langchain_benchmarks.schema import ToolUsageTask
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
from langchain_benchmarks.tool_usage.agents.base import AgentFactory
|
||||
|
||||
|
||||
class CustomRunnableAgentFactory:
|
||||
class CustomRunnableAgentFactory(AgentFactory):
|
||||
"""A factory for creating tool using agents.
|
||||
|
||||
A factory for agents that do not leverage any special JSON mode for
|
||||
@@ -46,4 +47,6 @@ class CustomRunnableAgentFactory:
|
||||
return_intermediate_steps=True,
|
||||
)
|
||||
|
||||
return apply_agent_executor_adapter(executor, state_reader=env.read_state)
|
||||
return apply_agent_executor_adapter(
|
||||
executor, state_reader=env.read_state
|
||||
).with_config({"run_name": "Agent", "metadata": {"task": self.task.name}})
|
||||
|
||||
@@ -4,7 +4,7 @@ This is useful for agents that follow the standard LangChain tool format.
|
||||
"""
|
||||
from typing import Optional
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents import AgentExecutor, create_tool_calling_agent
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import Runnable
|
||||
@@ -12,9 +12,10 @@ from langchain_core.runnables import Runnable
|
||||
from langchain_benchmarks.rate_limiting import RateLimiter, with_rate_limit
|
||||
from langchain_benchmarks.schema import ToolUsageTask
|
||||
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
|
||||
from langchain_benchmarks.tool_usage.agents.base import AgentFactory
|
||||
|
||||
|
||||
class StandardAgentFactory:
|
||||
class StandardAgentFactory(AgentFactory):
|
||||
"""A standard agent factory.
|
||||
|
||||
Use this factory with chat models that support the standard LangChain tool
|
||||
@@ -55,8 +56,6 @@ class StandardAgentFactory:
|
||||
|
||||
def __call__(self) -> Runnable:
|
||||
"""Call the factory to create Runnable agent."""
|
||||
# Temporarily import here until new langchain is released with create_tools_agent
|
||||
from langchain.agents import create_tool_calling_agent
|
||||
|
||||
env = self.task.create_environment()
|
||||
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
import pytest
|
||||
from langchain_core.agents import AgentActionMessageLog, AgentFinish
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.parser import (
|
||||
GenericAgentParser,
|
||||
)
|
||||
|
||||
|
||||
def test_parser() -> None:
|
||||
"""Test parser."""
|
||||
parser = GenericAgentParser(require_closing_tag=False, wrapping_xml_tag="tool")
|
||||
|
||||
# If <tool> tag not found then it's an agent finish
|
||||
assert isinstance(parser.invoke("goodbye"), AgentFinish)
|
||||
|
||||
with pytest.raises(OutputParserException):
|
||||
# Invocation content is missing tool name and arguments
|
||||
parser.invoke("<tool>'hello'</tool>")
|
||||
|
||||
with pytest.raises(OutputParserException):
|
||||
parser.invoke("<tool>hello")
|
||||
|
||||
# Full invocation
|
||||
text = (
|
||||
'<tool>{\n "tool_name": "type_letter",\n '
|
||||
'"arguments": {\n '
|
||||
'"letter": "h"\n }\n}</tool>\n'
|
||||
)
|
||||
|
||||
assert parser.invoke(text) == AgentActionMessageLog(
|
||||
tool="type_letter",
|
||||
tool_input={"letter": "h"},
|
||||
log="\nInvoking type_letter: {'letter': 'h'}\n\t",
|
||||
message_log=[AIMessage(content=text)],
|
||||
)
|
||||
|
||||
# Test more cases
|
||||
parsed = parser.invoke('<tool>{"tool_name": "hello"}</tool>')
|
||||
assert parsed.tool == "hello"
|
||||
# Assumes that it's a structured tool by default!
|
||||
assert parsed.tool_input == {}
|
||||
|
||||
with pytest.raises(OutputParserException):
|
||||
# Arguments need to be a dict
|
||||
parser.invoke('<tool>{"tool_name": "hello", "arguments": [1, 2]}</tool>')
|
||||
|
||||
parsed = parser.invoke(
|
||||
'<tool>{"tool_name": "hello", "arguments": {"a": "b"}}</tool>'
|
||||
)
|
||||
assert parsed.tool == "hello"
|
||||
# Assumes that it's a structured tool by default!
|
||||
assert parsed.tool_input == {"a": "b"}
|
||||
@@ -1,25 +0,0 @@
|
||||
"""Test typescript encoding."""
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
|
||||
FunctionDefinition,
|
||||
TypeScriptEncoder,
|
||||
)
|
||||
|
||||
|
||||
def test_function_definition() -> None:
|
||||
"""Test encoding a function definition."""
|
||||
function_definition = FunctionDefinition(
|
||||
name="test_function",
|
||||
description="A test function",
|
||||
parameters=[
|
||||
{"name": "test_parameter", "type": "str", "description": "A test parameter"}
|
||||
],
|
||||
return_value={"type": "str", "description": "A test return value"},
|
||||
)
|
||||
encoder = TypeScriptEncoder()
|
||||
xml = encoder.visit_function_definition(function_definition)
|
||||
assert xml == (
|
||||
"// A test function\n"
|
||||
"// @param test_parameter A test parameter\n"
|
||||
"// @returns A test return value\n"
|
||||
"function test_function(test_parameter: str): str;"
|
||||
)
|
||||
@@ -1,90 +0,0 @@
|
||||
"""Test XML encoding and decoding of function definitions, invocation, and results."""
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
|
||||
FunctionDefinition,
|
||||
FunctionInvocation,
|
||||
FunctionResult,
|
||||
XMLEncoder,
|
||||
)
|
||||
|
||||
|
||||
def test_function_definition_encoding() -> None:
|
||||
"""Test encoding a function definition."""
|
||||
function_definition = FunctionDefinition(
|
||||
name="test_function",
|
||||
description="A test function",
|
||||
parameters=[
|
||||
{"name": "test_parameter", "type": "str", "description": "A test parameter"}
|
||||
],
|
||||
return_value={"type": "str", "description": "A test return value"},
|
||||
)
|
||||
encoder = XMLEncoder()
|
||||
xml = encoder.visit_function_definition(function_definition)
|
||||
assert xml == (
|
||||
"<function>\n"
|
||||
"<function_name>test_function</function_name>\n"
|
||||
"<description>\n"
|
||||
"A test function\n"
|
||||
"</description>\n"
|
||||
"<parameters>\n"
|
||||
"<parameter>\n"
|
||||
"<name>test_parameter</name>\n"
|
||||
"<type>str</type>\n"
|
||||
"<description>A test parameter</description>\n"
|
||||
"</parameter>\n"
|
||||
"</parameters>\n"
|
||||
"<return_value>\n"
|
||||
"<type>str</type>\n"
|
||||
"<description>A test return value</description>\n"
|
||||
"</return_value>\n"
|
||||
"</function>"
|
||||
)
|
||||
|
||||
|
||||
def test_function_result_encoding() -> None:
|
||||
"""Test encoding a function result."""
|
||||
encoder = XMLEncoder()
|
||||
function_result = FunctionResult(
|
||||
name="test_function",
|
||||
result="test_result",
|
||||
error=None,
|
||||
)
|
||||
xml = encoder.visit_function_result(function_result)
|
||||
assert xml == (
|
||||
"<function_result>\n"
|
||||
"<function_name>test_function</function_name>\n"
|
||||
"<result>test_result</result>\n"
|
||||
"</function_result>"
|
||||
)
|
||||
|
||||
function_result = FunctionResult(
|
||||
name="test_function",
|
||||
error="error",
|
||||
)
|
||||
xml = encoder.visit_function_result(function_result)
|
||||
assert xml == (
|
||||
"<function_result>\n"
|
||||
"<function_name>test_function</function_name>\n"
|
||||
"<error>error</error>\n"
|
||||
"</function_result>"
|
||||
)
|
||||
|
||||
|
||||
def test_function_invocation() -> None:
|
||||
"""Test function invocation."""
|
||||
function_invocation = FunctionInvocation(
|
||||
name="test_function",
|
||||
arguments=[{"name": "test_argument", "value": "test_value"}],
|
||||
)
|
||||
encoder = XMLEncoder()
|
||||
xml = encoder.visit_function_invocation(function_invocation)
|
||||
assert xml == (
|
||||
"<function_invocation>\n"
|
||||
"<function_name>test_function</function_name>\n"
|
||||
"<arguments>\n"
|
||||
"<argument>\n"
|
||||
"<name>test_argument</name>\n"
|
||||
"<value>test_value</value>\n"
|
||||
"</argument>\n"
|
||||
"</arguments>\n"
|
||||
"</function_invocation>"
|
||||
)
|
||||
@@ -1,59 +0,0 @@
|
||||
import pytest
|
||||
from langchain.tools import tool
|
||||
|
||||
from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import (
|
||||
convert_tool_to_function_definition,
|
||||
)
|
||||
|
||||
|
||||
@tool
|
||||
def get_hello() -> str:
|
||||
"""Get hello."""
|
||||
return "hello"
|
||||
|
||||
|
||||
@tool
|
||||
def repeat(x: str) -> str:
|
||||
"""Repeat x.
|
||||
|
||||
Args:
|
||||
x: The string to repeat.
|
||||
|
||||
Returns:
|
||||
The repeated string.
|
||||
"""
|
||||
return x
|
||||
|
||||
|
||||
def test_parameterless_function() -> None:
|
||||
"""Test foo."""
|
||||
function_definition = convert_tool_to_function_definition(get_hello)
|
||||
assert function_definition == {
|
||||
"name": "get_hello",
|
||||
"description": "Get hello.",
|
||||
"parameters": [],
|
||||
"return_value": {
|
||||
"type": "Any",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.skip("Need to fix handling of leading whitespace")
|
||||
def test_function_with_parameters() -> None:
|
||||
import textwrap
|
||||
|
||||
doc = textwrap.dedent(repeat.func.__doc__)
|
||||
assert convert_tool_to_function_definition(repeat) == {
|
||||
"name": "repeat",
|
||||
"description": doc,
|
||||
"parameters": [
|
||||
{
|
||||
"name": "x",
|
||||
"type": "str",
|
||||
"description": "", # Need to fix this
|
||||
}
|
||||
],
|
||||
"return_value": {
|
||||
"type": "Any",
|
||||
},
|
||||
}
|
||||
@@ -6,5 +6,11 @@ def test_public_api() -> None:
|
||||
# This test will also fail if __all__ is not sorted.
|
||||
# Please keep it sorted!
|
||||
assert __all__ == sorted(
|
||||
["apply_agent_executor_adapter", "get_eval_config"], key=str.lower
|
||||
[
|
||||
"apply_agent_executor_adapter",
|
||||
"get_eval_config",
|
||||
"CustomRunnableAgentFactory",
|
||||
"StandardAgentFactory",
|
||||
],
|
||||
key=str.lower,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user