Remove old code (#176)

Remove old code
This commit is contained in:
Eugene Yurtsev
2024-04-18 11:16:42 -04:00
committed by GitHub
parent c80e959b05
commit bec40d90ef
22 changed files with 33 additions and 1451 deletions
+7 -1
View File
@@ -1,9 +1,15 @@
"""Package for helping to evaluate agent runs."""
from langchain_benchmarks.tool_usage.agents import apply_agent_executor_adapter
from langchain_benchmarks.tool_usage.agents import (
CustomRunnableAgentFactory,
StandardAgentFactory,
apply_agent_executor_adapter,
)
from langchain_benchmarks.tool_usage.evaluators import get_eval_config
# Please keep this list sorted!
__all__ = [
"apply_agent_executor_adapter",
"CustomRunnableAgentFactory",
"get_eval_config",
"StandardAgentFactory",
]
@@ -1,25 +1,11 @@
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
from langchain_benchmarks.tool_usage.agents.anthropic_tool_user import (
AnthropicToolUserFactory,
)
from langchain_benchmarks.tool_usage.agents.experimental.factory import (
CustomAgentFactory,
)
from langchain_benchmarks.tool_usage.agents.openai_assistant import (
OpenAIAssistantFactory,
)
from langchain_benchmarks.tool_usage.agents.openai_functions import OpenAIAgentFactory
from langchain_benchmarks.tool_usage.agents.runnable_agent import (
CustomRunnableAgentFactory,
)
from langchain_benchmarks.tool_usage.agents.tool_using_agent import StandardAgentFactory
__all__ = [
"OpenAIAgentFactory",
"OpenAIAssistantFactory",
"apply_agent_executor_adapter",
"CustomAgentFactory",
"AnthropicToolUserFactory",
"CustomRunnableAgentFactory",
"StandardAgentFactory",
]
@@ -1,271 +0,0 @@
"""Wrapper around the anthropic tool user SDK.
The anthropic tool user SDK is an alpha release so this code will likely be
changed or deleted in the future. It's here simply to make it easier to benchmark
the performance of the existing tool user SDK, to compare it with the performance
of other implementations.
"""
from importlib.util import find_spec
from typing import Any, Dict, List, Optional, Sequence
from langchain.tools import StructuredTool
from langchain_core.callbacks.manager import trace_as_chain_group
from langchain_core.runnables import Runnable, RunnableConfig, RunnableLambda
from langchain_benchmarks import rate_limiting
from langchain_benchmarks.schema import ToolUsageTask
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
def convert_langchain_tool_to_tool_user_tool(lc_tool: StructuredTool) -> Any:
"""Convert a langchain tool to a tool user tool."""
from tool_use_package.tools.base_tool import BaseTool
class DynamicTool(BaseTool):
def use_tool(self, **kwargs):
return lc_tool(kwargs)
schema = lc_tool.args_schema.schema()
properties = schema["properties"]
parameters = []
# Is this needed or is string OK?
type_adapter = {
"string": "str", # str or string?
"integer": "int",
"number": "float",
"boolean": "bool",
}
for key, value in properties.items():
parameters.append(
{
"name": key,
"type": type_adapter.get(value["type"], value["type"]),
"description": value.get("description", ""),
}
)
return DynamicTool(lc_tool.name, lc_tool.description, parameters)
def _handle_tool_inputs(
tool_inputs: List[Dict[str, Any]],
tools: Sequence[StructuredTool],
config: Optional[RunnableConfig] = None,
) -> Dict[str, Any]:
"""Handle tool inputs."""
tool_by_name = {tool.name: tool for tool in tools}
tool_error: Optional[str] = None
tool_outputs = []
for tool_input in tool_inputs:
tool_name = tool_input["tool_name"]
tool_arguments = tool_input["tool_arguments"]
tool = tool_by_name[tool_name]
try:
tool_result = tool.invoke(tool_arguments, config=config)
except Exception as e: # Break on first error
tool_error = str(e)
tool_outputs = None
break
tool_outputs.append(
{
"tool_name": tool_name,
"tool_result": tool_result,
}
)
return {
"role": "tool_outputs",
"tool_outputs": tool_outputs,
"tool_error": tool_error,
}
def run_anthropic_agent_simple(
tools: Sequence[StructuredTool],
user_message: str,
*,
max_iterations: int = 30,
config: Optional[RunnableConfig] = None,
**kwargs,
) -> List[dict]:
"""Make an anthropic agent."""
from tool_use_package.tool_user import ToolUser
verbose = kwargs.pop("verbose", False)
tool_user = ToolUser(
[convert_langchain_tool_to_tool_user_tool(tool) for tool in tools], **kwargs
)
messages = [
{
"role": "human",
"content": user_message,
"tool_error": None,
"tool_outputs": [],
"tool_inputs": [],
}
]
with trace_as_chain_group(
"Anthropic Agent Run",
inputs={"user_message": user_message},
callback_manager=config.get("callbacks", None) if config else None,
) as group_manager:
for num_iteration in range(max_iterations):
with trace_as_chain_group(
f"Anthropic Agent Iteration {num_iteration}",
inputs={"messages": messages},
callback_manager=group_manager.parent_run_manager.get_child(),
) as iteration_manager:
last_message = tool_user.use_tools(
messages, execution_mode="manual", verbose=verbose
)
new_messages = [last_message]
if last_message["role"] == "tool_inputs":
tool_inputs = last_message["tool_inputs"]
new_message = _handle_tool_inputs(
tool_inputs,
tools,
config={
"callbacks": iteration_manager.parent_run_manager.get_child(),
},
)
new_messages.append(new_message)
iteration_manager.on_chain_end(outputs=new_messages)
messages.extend(new_messages)
# Finally break if the last message is from the assistant
if last_message["role"] == "assistant":
break
else:
raise ValueError("Max iterations reached")
group_manager.on_chain_end(outputs=messages)
return messages
def convert_messages_to_finalized_output(
messages: List[Dict[str, Any]],
) -> Dict[str, Any]:
"""Convert the history of messages into the expected output for eval.
This matches the agent executor output which has the following structure:
{
"output": "The output of the agent",
"intermediate_steps": [
(
AgentAction(
tool="add_x_y",
tool_input={"x": 2.0, "y": 5.0},
log="Invoking tool `add_x_y` with `{'x': 2.0, 'y': 5.0}`",
),
9.0,
)
],
"state": Any, # Optional key for tasks that involve manipulation of an env.
}
"""
if not messages:
raise ValueError("Expected at least one message")
last_message = messages[-1]
if last_message["role"] != "assistant":
raise ValueError(
f"Expected the last message to be from the assistant. "
f"Instead got {last_message}."
)
actual_steps = []
for message in messages:
if "role" not in message:
raise ValueError(f"Expected role in message {message}")
role = message["role"]
if role == "tool_inputs":
# Get the name of the tool used
for tool_input in message["tool_inputs"]:
actual_steps.append(tool_input["tool_name"])
return {
"output": last_message["content"],
"actual_steps": actual_steps,
}
def create_agent(tools: Sequence[StructuredTool]) -> RunnableLambda:
"""Create an agent."""
def run_agent(
input: dict, config: Optional[RunnableConfig] = None, **kwargs
) -> dict:
"""Run the agent."""
messages = run_anthropic_agent_simple(
tools, input["input"], config=config, **kwargs
)
return convert_messages_to_finalized_output(messages)
return RunnableLambda(run_agent)
class AnthropicToolUserFactory:
def __init__(
self,
task: ToolUsageTask,
*,
rate_limiter: Optional[rate_limiting.RateLimiter] = None,
) -> None:
"""Create an OpenAI agent factory for the given task.
Args:
task: The task to create an agent factory for.
rate_limiter: The rate limiter to use
"""
self.task = task
self.rate_limiter = rate_limiter
if not find_spec("tool_use_package"):
raise ImportError(
'Could not import "tool_use_package". Please '
"follow instructions here to install "
"https://github.com/anthropics/anthropic-tools/tree/main"
)
def __call__(self, **kwargs: Any) -> Runnable:
env = self.task.create_environment()
def _add_task_instructions(
input: dict, config: Optional[RunnableConfig] = None, **kwargs
) -> dict:
"""Add task instructions to the question."""
if not isinstance(input, dict) or "question" not in input:
raise ValueError(
f"Expected input to be a dict with key `question`. "
f"Found {type(input)}."
)
input = input.copy()
input["question"] = (
f"{self.task.instructions}\nWrite down your answer, "
f"but do not explain it. Input: `{input['question']}`"
)
return input
agent = create_agent(env.tools) # type: ignore
# Returns `state` in the output if the environment has a state reader
# makes sure that `output` is always in the output
if kwargs:
agent = agent.bind(**kwargs)
runnable = _add_task_instructions | apply_agent_executor_adapter(
agent, state_reader=env.read_state
)
if self.rate_limiter: # Add a rate limiter
runnable = rate_limiting.with_rate_limit(runnable, self.rate_limiter)
return runnable
@@ -0,0 +1,11 @@
import abc
from langchain_core.runnables import Runnable
class AgentFactory(abc.ABC):
"""Abstract class for agent factory"""
@abc.abstractmethod
def __call__(self) -> Runnable:
"""Create a new agent"""
@@ -1,133 +0,0 @@
from typing import List, Literal, Optional, Sequence, Tuple, Union
from langchain.agents import AgentOutputParser
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema.runnable import Runnable
from langchain.tools import StructuredTool
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.prompts import MessagesPlaceholder
from typing_extensions import NotRequired, TypedDict
from langchain_benchmarks import RateLimiter
from langchain_benchmarks.rate_limiting import with_rate_limit
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
AstPrinter,
FunctionResult,
TypeScriptEncoder,
XMLEncoder,
)
from langchain_benchmarks.tool_usage.agents.experimental.prompts import (
_AGENT_INSTRUCTIONS_BLOB_STYLE,
)
from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import (
convert_tool_to_function_definition,
)
def format_steps_for_chat(
intermediate_steps: List[Tuple[AgentAction, str]],
ast_printer: AstPrinter,
) -> List[BaseMessage]:
"""Format the steps."""
messages = []
for action, observation in intermediate_steps:
# Action messages contains the tool invocation request from the LLM
# Now add the result of the tool invocation.
if action.tool == "_Exception":
messages.append(
AIMessage(
content=action.log,
)
)
messages.append(
# Tool input is the error message for the exception
HumanMessage(content=action.tool_input)
)
else:
messages.extend(action.messages)
function_result: FunctionResult = {
"name": action.tool,
"error": None,
"result": observation,
}
messages.append(
HumanMessage(
content=ast_printer.visit_function_result(function_result),
)
)
return messages
# PUBLIC API
class AgentInput(TypedDict):
"""The input to the agent."""
input: str
"""The input to the agent."""
intermediate_steps: List[Tuple[AgentAction, str]]
"""The intermediate steps taken by the agent."""
examples: NotRequired[List[BaseMessage]]
"""A list of messages that can be used to form example traces."""
def create_agent(
model: Union[BaseChatModel, BaseLanguageModel],
tools: Sequence[StructuredTool],
parser: AgentOutputParser,
*,
ast_printer: Union[AstPrinter, Literal["xml"]] = "xml",
rate_limiter: Optional[RateLimiter] = None,
) -> Runnable[AgentInput, Union[AgentAction, AgentFinish]]:
"""Create an agent for a chat model."""
if isinstance(ast_printer, str):
if ast_printer == "xml":
ast_printer_ = XMLEncoder()
elif ast_printer == "typescript":
ast_printer_ = TypeScriptEncoder()
else:
raise ValueError(f"Unknown ast printer: {ast_printer}")
elif isinstance(ast_printer, AstPrinter):
ast_printer_ = ast_printer
else:
raise TypeError(
f"Expected AstPrinter or str, got {type(ast_printer)} for `ast_printer`"
)
function_definitions = [convert_tool_to_function_definition(tool) for tool in tools]
tool_description = ast_printer_.visit_function_definitions(function_definitions)
template = ChatPromptTemplate.from_messages(
[
("system", _AGENT_INSTRUCTIONS_BLOB_STYLE),
MessagesPlaceholder("examples"), # Can use to add example traces
("human", "{input}"),
MessagesPlaceholder("history"),
]
).partial(tool_description=tool_description)
# For the time being, hard-coding the fact that we're using a <tool> tag.
model = model.bind(stop=["</tool>"])
if rate_limiter:
# Apply a rate limiter if it was provided
model = with_rate_limit(model, rate_limiter)
agent = (
{
"input": lambda x: x["input"],
"history": lambda x: format_steps_for_chat(
x["intermediate_steps"], ast_printer_
),
"examples": lambda x: x.get("examples", []),
}
| template
| model
| parser
)
return agent
@@ -1,240 +0,0 @@
"""Prototyping code for rendering function definitions, invocations, and results.
Types are simplified for now to `str`.
We should actually support something like pydantic or jsonschema for the types, so
we can expand them recursively for nested types.
"""
import abc
from typing import Any, List, Optional
from typing_extensions import NotRequired, TypedDict
class Parameter(TypedDict):
"""Representation for a parameter."""
name: str
type: str
description: str
class Arguments(TypedDict):
"""Arguments are passed to a function during function invocation."""
name: Optional[str]
value: Any
class ReturnValue(TypedDict):
"""Representation for a return value of a function call."""
type: str
description: NotRequired[str]
class FunctionDefinition(TypedDict):
"""Representation for a function."""
name: str
description: str # Function description
parameters: List[Parameter]
return_value: ReturnValue
class FunctionInvocation(TypedDict):
"""Representation for a function invocation."""
id: NotRequired[str]
name: str
arguments: List[Arguments]
class FunctionResult(TypedDict):
"""Representation for a function result."""
id: NotRequired[str]
name: str
result: Optional[str]
error: Optional[str]
class Visitor(abc.ABC):
@abc.abstractmethod
def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
"""Render a function."""
@abc.abstractmethod
def visit_function_definitions(
self, function_definitions: List[FunctionDefinition]
) -> str:
"""Render a function."""
@abc.abstractmethod
def visit_function_invocation(self, function_invocation: FunctionInvocation) -> str:
"""Render a function invocation."""
@abc.abstractmethod
def visit_function_result(self, function_result: FunctionResult) -> str:
"""Render a function result."""
class AstPrinter(Visitor):
"""Print the AST."""
class XMLEncoder(AstPrinter):
def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
"""Render a function."""
parameters_lines = []
for parameter in function_definition["parameters"]:
parameters_lines.extend(
[
"<parameter>",
f"<name>{parameter['name']}</name>",
f"<type>{parameter['type']}</type>",
f"<description>{parameter['description']}</description>",
"</parameter>",
]
)
lines = [
"<function>",
f"<function_name>{function_definition['name']}</function_name>",
"<description>",
f"{function_definition['description']}",
"</description>",
"<parameters>",
*parameters_lines,
"</parameters>",
"<return_value>",
f"<type>{function_definition['return_value']['type']}</type>",
]
if function_definition["return_value"].get("description"):
lines.append(
f"<description>{function_definition['return_value']['description']}"
f"</description>"
)
lines.extend(["</return_value>", "</function>"])
return "\n".join(lines)
def visit_function_definitions(
self, function_definitions: List[FunctionDefinition]
) -> str:
"""Render a function."""
strs = [
self.visit_function_definition(function_definition)
for function_definition in function_definitions
]
return "<functions>\n" + "\n".join(strs) + "\n</functions>"
def visit_function_invocation(self, invocation: FunctionInvocation) -> str:
"""Render a function invocation."""
arguments_as_strings = [
"<argument>\n"
f"<name>{argument['name']}</name>\n"
f"<value>{argument['value']}</value>\n"
"</argument>\n"
for argument in invocation["arguments"]
]
lines = ["<function_invocation>"]
if invocation.get("id"):
lines.append(f"<id>{invocation['id']}</id>")
lines.extend(
[
f"<function_name>{invocation['name']}</function_name>\n"
"<arguments>\n"
f"{''.join(arguments_as_strings)}" # Already includes trailing newline
"</arguments>\n"
"</function_invocation>"
]
)
return "\n".join(lines)
def visit_function_result(self, function_result: FunctionResult) -> str:
"""Render a function result."""
lines = [
"<function_result>",
]
if function_result.get("id"):
lines.append(f"<id>{function_result['id']}</id>")
lines.append(f"<function_name>{function_result['name']}</function_name>")
if function_result["error"]:
lines.extend(
[
f"<error>{function_result['error']}</error>",
]
)
else:
lines.append(
f"<result>{function_result['result']}</result>",
)
lines.append("</function_result>")
return "\n".join(lines)
class TypeScriptEncoder(AstPrinter):
def visit_function_definition(self, function_definition: FunctionDefinition) -> str:
"""Render a function."""
parameters_as_strings = [
f"{parameter['name']}: {parameter['type']}"
for parameter in function_definition["parameters"]
]
# Let's use JSdoc style comments
# First the function description
lines = [
f"// {function_definition['description']}",
# Then the parameter descriptions
*[
f"// @param {parameter['name']} {parameter['description']}"
for parameter in function_definition["parameters"]
],
# Then the return value description
f"// @returns {function_definition['return_value']['description']}",
# Then the function definition
f"function {function_definition['name']}("
f"{', '.join(parameters_as_strings)}): "
f"{function_definition['return_value']['type']};",
]
# finally join
function = "\n".join(lines)
return function
def visit_function_definitions(
self, function_definitions: List[FunctionDefinition]
) -> str:
"""Render a function."""
strs = [
self.visit_function_definition(function_definition)
for function_definition in function_definitions
]
return "\n\n".join(strs)
def visit_function_invocation(self, invocation: FunctionInvocation) -> str:
"""Render a function invocation."""
arguments_as_strings = [
f"{argument['name']}: {argument['value']}"
for argument in invocation["arguments"]
]
lines = [f"{invocation['name']}(" f"{', '.join(arguments_as_strings)});"]
return "\n".join(lines)
def visit_function_result(self, function_result: FunctionResult) -> str:
"""Render a function result."""
lines = []
if function_result["error"]:
lines.append(f"ERROR: {function_result['error']}")
else:
lines.append(f"> {function_result['result']}")
if function_result.get("id"):
lines.append(f"// ID: {function_result['id']}")
return "\n".join(lines)
@@ -1,93 +0,0 @@
"""Factory for creating agents for the tool usage task."""
from typing import Optional
from langchain.agents import AgentExecutor
from langchain_core.runnables import Runnable, RunnableConfig
from langchain_benchmarks import RateLimiter, model_registry
from langchain_benchmarks.schema import ToolUsageTask
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
from langchain_benchmarks.tool_usage.agents.experimental.agent import create_agent
from langchain_benchmarks.tool_usage.agents.experimental.parser import (
GenericAgentParser,
)
class CustomAgentFactory:
"""A factory for creating tool using agents.
A factory for agents that do not leverage any special JSON mode for
function usage; instead all function invocation behavior is implemented solely
through prompt engineering and parsing.
"""
def __init__(
self,
task: ToolUsageTask,
model: str,
*,
rate_limiter: Optional[RateLimiter] = None,
num_retries: int = 0,
) -> None:
"""Create an agent factory for the given tool usage task.
Args:
task: The task to create an agent factory for
model: model name (check model_registry)
rate_limiter: The rate limiter to use if provided
num_retries: The number of times to retry the agent if it fails
"""
if model not in model_registry:
raise ValueError(f"Unknown model: {model}")
self.task = task
self.model = model
self.rate_limiter = rate_limiter
self.num_retries = num_retries
def __call__(self) -> Runnable:
if isinstance(self.model, str):
registered_model = model_registry.get_model(self.model)
if registered_model is None:
raise ValueError(f"Unknown model: {self.model}")
model = registered_model.get_model(model_params={"temperature": 0})
else:
model = self.model
def _add_task_instructions(
input: dict, config: Optional[RunnableConfig] = None, **kwargs
) -> dict:
"""Add task instructions to the question."""
if not isinstance(input, dict):
raise ValueError(
f"Expected input to be a dict with key `question`. "
f"Found {type(input)}."
)
input = input.copy()
input["question"] = (
f"{self.task.instructions}\nWrite down your answer, "
f"but do not explain it. Input: `{input['question']}`"
)
return input
env = self.task.create_environment()
agent = create_agent(
model,
env.tools,
GenericAgentParser(wrapping_xml_tag="tool", require_closing_xml_tag=False),
rate_limiter=self.rate_limiter,
)
if self.num_retries > 0:
agent = agent.with_retry(
stop_after_attempt=self.num_retries + 1,
)
executor = AgentExecutor(
agent=agent,
tools=env.tools,
handle_parsing_errors=True,
return_intermediate_steps=True,
)
return _add_task_instructions | apply_agent_executor_adapter(
executor, state_reader=env.read_state
)
@@ -1,122 +0,0 @@
import ast
import re
from typing import Dict, Optional, Union
from langchain.agents import AgentOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain_core.agents import AgentAction, AgentActionMessageLog, AgentFinish
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import AIMessage
class _ToolInvocationRequest(BaseModel):
"""Light-weight pydantic model for validating the raw tool invocation request.
The purpose of this model, is to make sure that whatever as parsed from
the raw llm output has `tool_name` and potential `arguments` fields, and
nothing else.
"""
tool_name: str
# OK parameterless tools which do not take arguments
arguments: Optional[Dict] = Field(default_factory=dict)
class GenericAgentParser(AgentOutputParser):
"""A generalized parser that makes it easier to parameterize different parsing."""
wrapping_xml_tag: str
"""The tag that wraps the function invocation request.
For example, if "tool", then the function invocation request should be wrapped
in <tool>...</tool>.
"""
require_closing_xml_tag: bool = False
"""Whether we should require a closing tag for the wrapping_xml_tag.
For example, if True, then the function invocation request should be wrapped
"""
def parse(self, text: str) -> Union[AgentFinish, AgentAction]:
"""Parse the output of the agent."""
open_tag = f"<{self.wrapping_xml_tag}>"
close_tag = f"</{self.wrapping_xml_tag}>"
if open_tag in text:
# This is a hack to make sure that </tool> is always present
# in the output if <tool>. </tool> may be a stop sequence for the
# language model, so depending on implementation
# the stop sequence may be cut off.
# There might be a better way to do this, but this works and
# is simple.
if not self.require_closing_xml_tag:
text += close_tag
pattern = rf"{open_tag}(?P<invocation>.*?){close_tag}"
match = re.search(pattern, text, re.DOTALL)
if match:
content = match.group("invocation").strip()
return parse_invocation(content, self.wrapping_xml_tag)
return AgentFinish(
log=text,
return_values={
"output": text,
},
)
def parse_invocation(text: str, tag: str) -> AgentAction:
"""Parse the content of the function invocation.
Args:
text: The text to parse.
tag: The tag that wraps the function invocation request.
Returns:
An AgentAction that corresponds to the function invocation.
Raises:
OutputParserException: If the parsing fails.
This exception is meant to be caught by the agent executor and
handled appropriately to provide feedback to the LLM.
"""
ai_content = f"<{tag}>{text}</{tag}>\n"
try:
result = ast.literal_eval(text)
except BaseException as e:
# Convert this to something controllable by the user.
err_msg = (
f"ERROR: Please use the format "
f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}</{tag}>\n'
)
raise OutputParserException(
error=e,
llm_output=ai_content,
observation=err_msg,
send_to_llm=True,
)
try:
request = _ToolInvocationRequest.validate(result)
except Exception as e: # Using broad exception since it's not just ValidationError
# Can also raise DictError if result is not a dict.
err_msg = (
f"ERROR: Please use the format "
f'<{tag}>{{"tool_name": $TOOL_NAME, "arguments": $ARGUMENTS}}</{tag}>\n'
)
raise OutputParserException(
error=e,
llm_output=ai_content,
send_to_llm=True,
observation=err_msg,
)
return AgentActionMessageLog(
message_log=[AIMessage(content=ai_content)],
tool=request.tool_name,
tool_input=request.arguments,
log=f"\nInvoking {request.tool_name}: {request.arguments}\n\t",
)
@@ -1,42 +0,0 @@
AGENT_INSTRUCTIONS_XML_FORMAT = """\
In this environment you have access to a set of tools you can use to answer the user's question.
You may call them like this:
<function_calls>
<invoke>
<tool_name>$TOOL_NAME</tool_name>
<parameters>
<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
...
</parameters>
</invoke>
</function_calls>
Here are the tools available:
{tool_description}
""" # noqa: E501
_AGENT_INSTRUCTIONS_BLOB_STYLE = """\
In this environment you have access to a set of tools you can use to answer the user's question.
Here are the tools available:
{tool_description}
You may call one tool at a time using a format that includes <tool> and </tool> tag.
Inside the tag the content is a python dictionary that uses python literals (e.g., numbers, strings, lists, dictionaries, etc.) to specify the tool invocation.
It must match the schema of the function as described in the tool description.
"arguments" is a dictionary of the arguments to the function.
<tool>
{{
"tool_name": $TOOL_NAME,
"arguments": $ARGUMENTS
}}
</tool>
If you do not know the answer use more tools. You can only take a single action at a time.\
""" # noqa: E501
@@ -1,57 +0,0 @@
"""Utilities to extract information from langchain tools for use in prompts."""
import inspect
from textwrap import dedent
from typing import List
from langchain.tools.base import StructuredTool
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
FunctionDefinition,
Parameter,
)
# PUBLIC API
def get_parameters_from_tool(tool: StructuredTool) -> List[Parameter]:
"""Convert a langchain tool to a tool user tool."""
schema = tool.args_schema.schema()
properties = schema["properties"]
parameters = []
# Is this needed or is string OK?
type_adapter = {
"string": "str", # str or string?
"integer": "int",
"number": "float",
"boolean": "bool",
}
for key, value in properties.items():
parameters.append(
{
"name": key,
"type": type_adapter.get(value["type"], value["type"]),
"description": value.get("description", ""),
}
)
return parameters
#
def convert_tool_to_function_definition(tool: StructuredTool) -> FunctionDefinition:
"""Convert a langchain tool to a tool user tool."""
# Here we re-inspect the underlying function to get the doc-string
# since StructuredTool modifies it, but we want the raw one for maximum
# flexibility.
description = inspect.getdoc(tool.func)
parameters = get_parameters_from_tool(tool)
return {
"name": tool.name,
"description": dedent(description),
"parameters": parameters,
"return_value": {
"type": "Any",
},
}
@@ -1,77 +0,0 @@
"""Code for creating an assistant factory for evaluating tool usage tasks.
See: https://platform.openai.com/docs/assistants/how-it-works/creating-assistants
"""
from typing import Optional
from langchain.agents import AgentExecutor
from langchain.agents.openai_assistant.base import OpenAIAssistantRunnable
from langchain.schema.runnable import Runnable
from langchain_benchmarks import rate_limiting
from langchain_benchmarks.schema import ToolUsageTask
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
class OpenAIAssistantFactory:
def __init__(
self,
task: ToolUsageTask,
*,
model: str,
rate_limiter: Optional[rate_limiting.RateLimiter] = None,
num_retries: int = 0,
) -> None:
"""Create an OpenAI agent factory for the given task.
Args:
task: The task to create an agent factory for.
model: The model to use -- this must be an open AI model.
rate_limiter: The rate limiter to use
num_retries: The number of times to retry the assistant if it fails
"""
if not isinstance(model, str):
raise ValueError(f"Expected str for model, got {type(model)}")
self.task = task
tools = task.create_environment().tools
# Stateless, so we only need to create it once
self.agent = OpenAIAssistantRunnable.create_assistant(
name=f"{task.name} assistant",
instructions=self.task.instructions,
tools=tools,
model=model,
as_agent=True,
)
self.rate_limiter = rate_limiter
self.num_retries = num_retries
def __call__(self) -> Runnable:
env = self.task.create_environment()
agent = self.agent
if self.rate_limiter is not None:
# Rate limited model
agent = rate_limiting.with_rate_limit(agent, self.rate_limiter)
def _map_key(x: dict):
# Assistant expects the 'content' key explicitly
return {
"content": x["input"],
**{k: v for k, v in x.items() if k != "input"},
}
agent = _map_key | self.agent
if self.num_retries > 0:
agent = agent.with_retry(
stop_after_attempt=self.num_retries + 1,
)
runnable = AgentExecutor(
agent=agent,
tools=env.tools,
handle_parsing_errors=True,
return_intermediate_steps=True,
)
# Returns `state` in the output if the environment has a state reader
# makes sure that `output` is always in the output
return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
@@ -1,166 +0,0 @@
"""Code for creating an agent factory for evaluating tool usage tasks."""
from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad.openai_tools import (
format_to_openai_tool_messages,
)
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import Runnable
from langchain.tools.render import format_tool_to_openai_tool
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
from langchain_core.language_models.base import LanguageModelInput
from langchain_core.messages import BaseMessage
from langchain_core.pydantic_v1 import BaseModel
from langchain_benchmarks import model_registry, rate_limiting
from langchain_benchmarks.model_registration import RegisteredModel
from langchain_benchmarks.schema import ToolUsageTask
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
# PUBLIC API
def _bind_tools(
llm: BaseChatModel,
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
tool_choice: Optional[str] = None,
json_mode: bool = False,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
"""Bind tools (and other objects) to this chat model.
Args:
tools: A list of tool definitions to bind to this chat model.
Can be a dictionary, pydantic model, or callable. Pydantic
models and callables will be automatically converted to
their schema dictionary representation.
tool_choice: Which tool to require the model to call.
Must be the name of the single provided tool or
"auto" to automatically determine which tool to call
(if any).
json_mode: Whether to set JSON mode for the tool call.
This guarantees the model will respond in valid JSON
(unless truncated).
kwargs: Any additional parameters to pass to the
:class:`~langchain.runnable.Runnable` constructor.
"""
formatted_tools: List[Dict[str, Union[str, dict]]] = [
format_tool_to_openai_tool(tool) for tool in tools
]
if tool_choice is not None:
if not formatted_tools:
raise ValueError(
"When specifying `tool_choice`, you must provide at least one " "tool."
)
tool_names = [tool["function"]["name"] for tool in formatted_tools]
if not any(tool_name == tool_choice for tool_name in tool_names):
raise ValueError(
f"Tool choice {tool_choice} was specified, but the only "
f"provided tools were {tool_names}."
)
tool_choice_ = {"type": "function", "function": {"name": tool_choice}}
kwargs = {**kwargs, "tool_choice": tool_choice_}
if json_mode:
kwargs = {**kwargs, "response_format": {"type": "json_object"}}
return llm.bind(
tools=formatted_tools,
**kwargs,
)
class OpenAIAgentFactory:
def __init__(
self,
task: ToolUsageTask,
*,
model: Union[
str, RegisteredModel, BaseLanguageModel, BaseChatModel
] = "gpt-3.5-turbo-16k",
rate_limiter: Optional[rate_limiting.RateLimiter] = None,
num_retries: int = 0,
) -> None:
"""Create an OpenAI agent factory for the given task.
Args:
task: The task to create an agent factory for.
model: The model to use -- this must be an open AI model.
rate_limiter: The rate limiter to use
"""
self.task = task
self.model = model
self.rate_limiter = rate_limiter
self.num_retries = num_retries
def _create_model(self) -> Union[BaseChatModel, BaseLanguageModel]:
if isinstance(self.model, RegisteredModel):
return self.model.get_model(
model_params={"temperature": 0, "model_kwargs": {"seed": 0}}
)
elif isinstance(self.model, (BaseChatModel, BaseLanguageModel)):
return self.model
elif isinstance(self.model, str):
if self.model in model_registry:
registered_model = model_registry.get_model(self.model)
model_instance = registered_model.get_model(
model_params={"temperature": 0, "model_kwargs": {"seed": 0}}
)
return model_instance
else:
raise ValueError(f"Unknown model: {self.model}")
else:
raise TypeError(f"Expected str or RegisteredModel, got {type(self.model)}")
def create(self) -> Runnable:
"""Agent Executor"""
# For backwards compatibility
return self()
def __call__(self) -> Runnable:
model = self._create_model()
env = self.task.create_environment()
model = _bind_tools(model, env.tools)
if self.rate_limiter is not None:
# Rate limited model
model = rate_limiting.with_rate_limit(model, self.rate_limiter)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
self.task.instructions,
),
("user", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad"),
]
)
runnable_agent = (
{
"input": lambda x: x["input"],
"agent_scratchpad": lambda x: format_to_openai_tool_messages(
x["intermediate_steps"]
),
}
| prompt
| model
| OpenAIToolsAgentOutputParser()
)
if self.num_retries > 0:
runnable_agent = runnable_agent.with_retry(
stop_after_attempt=self.num_retries + 1,
)
runnable = AgentExecutor(
agent=runnable_agent,
tools=env.tools,
handle_parsing_errors=True,
return_intermediate_steps=True,
)
# Returns `state` in the output if the environment has a state reader
# makes sure that `output` is always in the output
return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
@@ -10,9 +10,10 @@ from langchain_core.runnables import Runnable
from langchain_benchmarks.schema import ToolUsageTask
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
from langchain_benchmarks.tool_usage.agents.base import AgentFactory
class CustomRunnableAgentFactory:
class CustomRunnableAgentFactory(AgentFactory):
"""A factory for creating tool using agents.
A factory for agents that do not leverage any special JSON mode for
@@ -46,4 +47,6 @@ class CustomRunnableAgentFactory:
return_intermediate_steps=True,
)
return apply_agent_executor_adapter(executor, state_reader=env.read_state)
return apply_agent_executor_adapter(
executor, state_reader=env.read_state
).with_config({"run_name": "Agent", "metadata": {"task": self.task.name}})
@@ -4,7 +4,7 @@ This is useful for agents that follow the standard LangChain tool format.
"""
from typing import Optional
from langchain.agents import AgentExecutor
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.language_models import BaseChatModel
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable
@@ -12,9 +12,10 @@ from langchain_core.runnables import Runnable
from langchain_benchmarks.rate_limiting import RateLimiter, with_rate_limit
from langchain_benchmarks.schema import ToolUsageTask
from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
from langchain_benchmarks.tool_usage.agents.base import AgentFactory
class StandardAgentFactory:
class StandardAgentFactory(AgentFactory):
"""A standard agent factory.
Use this factory with chat models that support the standard LangChain tool
@@ -55,8 +56,6 @@ class StandardAgentFactory:
def __call__(self) -> Runnable:
"""Call the factory to create Runnable agent."""
# Temporarily import here until new langchain is released with create_tools_agent
from langchain.agents import create_tool_calling_agent
env = self.task.create_environment()
View File
@@ -1,54 +0,0 @@
import pytest
from langchain_core.agents import AgentActionMessageLog, AgentFinish
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import AIMessage
from langchain_benchmarks.tool_usage.agents.experimental.parser import (
GenericAgentParser,
)
def test_parser() -> None:
"""Test parser."""
parser = GenericAgentParser(require_closing_tag=False, wrapping_xml_tag="tool")
# If <tool> tag not found then it's an agent finish
assert isinstance(parser.invoke("goodbye"), AgentFinish)
with pytest.raises(OutputParserException):
# Invocation content is missing tool name and arguments
parser.invoke("<tool>'hello'</tool>")
with pytest.raises(OutputParserException):
parser.invoke("<tool>hello")
# Full invocation
text = (
'<tool>{\n "tool_name": "type_letter",\n '
'"arguments": {\n '
'"letter": "h"\n }\n}</tool>\n'
)
assert parser.invoke(text) == AgentActionMessageLog(
tool="type_letter",
tool_input={"letter": "h"},
log="\nInvoking type_letter: {'letter': 'h'}\n\t",
message_log=[AIMessage(content=text)],
)
# Test more cases
parsed = parser.invoke('<tool>{"tool_name": "hello"}</tool>')
assert parsed.tool == "hello"
# Assumes that it's a structured tool by default!
assert parsed.tool_input == {}
with pytest.raises(OutputParserException):
# Arguments need to be a dict
parser.invoke('<tool>{"tool_name": "hello", "arguments": [1, 2]}</tool>')
parsed = parser.invoke(
'<tool>{"tool_name": "hello", "arguments": {"a": "b"}}</tool>'
)
assert parsed.tool == "hello"
# Assumes that it's a structured tool by default!
assert parsed.tool_input == {"a": "b"}
@@ -1,25 +0,0 @@
"""Test typescript encoding."""
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
FunctionDefinition,
TypeScriptEncoder,
)
def test_function_definition() -> None:
"""Test encoding a function definition."""
function_definition = FunctionDefinition(
name="test_function",
description="A test function",
parameters=[
{"name": "test_parameter", "type": "str", "description": "A test parameter"}
],
return_value={"type": "str", "description": "A test return value"},
)
encoder = TypeScriptEncoder()
xml = encoder.visit_function_definition(function_definition)
assert xml == (
"// A test function\n"
"// @param test_parameter A test parameter\n"
"// @returns A test return value\n"
"function test_function(test_parameter: str): str;"
)
@@ -1,90 +0,0 @@
"""Test XML encoding and decoding of function definitions, invocation, and results."""
from langchain_benchmarks.tool_usage.agents.experimental.encoder import (
FunctionDefinition,
FunctionInvocation,
FunctionResult,
XMLEncoder,
)
def test_function_definition_encoding() -> None:
"""Test encoding a function definition."""
function_definition = FunctionDefinition(
name="test_function",
description="A test function",
parameters=[
{"name": "test_parameter", "type": "str", "description": "A test parameter"}
],
return_value={"type": "str", "description": "A test return value"},
)
encoder = XMLEncoder()
xml = encoder.visit_function_definition(function_definition)
assert xml == (
"<function>\n"
"<function_name>test_function</function_name>\n"
"<description>\n"
"A test function\n"
"</description>\n"
"<parameters>\n"
"<parameter>\n"
"<name>test_parameter</name>\n"
"<type>str</type>\n"
"<description>A test parameter</description>\n"
"</parameter>\n"
"</parameters>\n"
"<return_value>\n"
"<type>str</type>\n"
"<description>A test return value</description>\n"
"</return_value>\n"
"</function>"
)
def test_function_result_encoding() -> None:
"""Test encoding a function result."""
encoder = XMLEncoder()
function_result = FunctionResult(
name="test_function",
result="test_result",
error=None,
)
xml = encoder.visit_function_result(function_result)
assert xml == (
"<function_result>\n"
"<function_name>test_function</function_name>\n"
"<result>test_result</result>\n"
"</function_result>"
)
function_result = FunctionResult(
name="test_function",
error="error",
)
xml = encoder.visit_function_result(function_result)
assert xml == (
"<function_result>\n"
"<function_name>test_function</function_name>\n"
"<error>error</error>\n"
"</function_result>"
)
def test_function_invocation() -> None:
"""Test function invocation."""
function_invocation = FunctionInvocation(
name="test_function",
arguments=[{"name": "test_argument", "value": "test_value"}],
)
encoder = XMLEncoder()
xml = encoder.visit_function_invocation(function_invocation)
assert xml == (
"<function_invocation>\n"
"<function_name>test_function</function_name>\n"
"<arguments>\n"
"<argument>\n"
"<name>test_argument</name>\n"
"<value>test_value</value>\n"
"</argument>\n"
"</arguments>\n"
"</function_invocation>"
)
@@ -1,59 +0,0 @@
import pytest
from langchain.tools import tool
from langchain_benchmarks.tool_usage.agents.experimental.tool_utils import (
convert_tool_to_function_definition,
)
@tool
def get_hello() -> str:
"""Get hello."""
return "hello"
@tool
def repeat(x: str) -> str:
"""Repeat x.
Args:
x: The string to repeat.
Returns:
The repeated string.
"""
return x
def test_parameterless_function() -> None:
"""Test foo."""
function_definition = convert_tool_to_function_definition(get_hello)
assert function_definition == {
"name": "get_hello",
"description": "Get hello.",
"parameters": [],
"return_value": {
"type": "Any",
},
}
@pytest.mark.skip("Need to fix handling of leading whitespace")
def test_function_with_parameters() -> None:
import textwrap
doc = textwrap.dedent(repeat.func.__doc__)
assert convert_tool_to_function_definition(repeat) == {
"name": "repeat",
"description": doc,
"parameters": [
{
"name": "x",
"type": "str",
"description": "", # Need to fix this
}
],
"return_value": {
"type": "Any",
},
}
@@ -6,5 +6,11 @@ def test_public_api() -> None:
# This test will also fail if __all__ is not sorted.
# Please keep it sorted!
assert __all__ == sorted(
["apply_agent_executor_adapter", "get_eval_config"], key=str.lower
[
"apply_agent_executor_adapter",
"get_eval_config",
"CustomRunnableAgentFactory",
"StandardAgentFactory",
],
key=str.lower,
)