Add LangGraph, cleanup old / redundant chains

This commit is contained in:
Lance Martin
2024-02-20 15:12:01 -08:00
parent 4daf17315a
commit 1862687d68
11 changed files with 462 additions and 724 deletions
+40 -15
View File
@@ -1,11 +1,12 @@
import weaviate
from langchain_openai import ChatOpenAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Weaviate
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain.embeddings.voyageai import VoyageEmbeddings
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.utils.function_calling import convert_to_openai_tool
# Keys
import os
@@ -39,23 +40,47 @@ def get_retriever():
# Retriever
retriever = get_retriever()
# Prompt
template = """Answer the question based only on the following context:
{context}
## Data model
class code(BaseModel):
"""Code output"""
prefix: str = Field(description="Description of the problem and approach")
imports: str = Field(description="Code block import statements")
code: str = Field(description="Code block not including import statements")
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
## LLM
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
# LLM
model = ChatOpenAI(model="gpt-4-1106-preview")
# Tool
code_tool_oai = convert_to_openai_tool(code)
# LLM with tool and enforce invocation
llm_with_tool = model.bind(
tools=[convert_to_openai_tool(code_tool_oai)],
tool_choice={"type": "function", "function": {"name": "code"}},
)
# Parser
parser_tool = PydanticToolsParser(tools=[code])
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are a coding assistant with expertise in LangChain. \n
Here is relevant context:
\n ------- \n
{context}
\n ------- \n
Ensure any code you provide can be executed with all required imports and variables defined. \n
Structure your answer with a description of the code solution. \n
Then list the imports. And finally list the functioning code block. \n
Here is the user question: \n --- --- --- \n {question}""",
input_variables=["question","context"])
# Chain
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
| llm_with_tool
| parser_tool
)
# Add typing for input
@@ -1,83 +0,0 @@
import weaviate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Weaviate
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.embeddings.voyageai import VoyageEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
# Keys
import os
WEAVIATE_URL = os.environ["WEAVIATE_URL"]
WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
# Fine-tuned embd and vectorstore
def get_embeddings_model():
if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
return OpenAIEmbeddings(chunk_size=200)
def get_retriever():
weaviate_client = weaviate.Client(
url=WEAVIATE_URL,
auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
)
weaviate_client = Weaviate(
client=weaviate_client,
index_name=WEAVIATE_DOCS_INDEX_NAME,
text_key="text",
embedding=get_embeddings_model(),
by_text=False,
attributes=["source", "title"],
)
return weaviate_client.as_retriever(search_kwargs=dict(k=6))
# Retriever
retriever = get_retriever()
# Output
class FunctionOutput(BaseModel):
prefix: str = Field(description="The prefix of the output")
code_block: str = Field(description="The code block of the output")
# Create an instance of the PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=FunctionOutput)
# Get the format instructions from the output parser
format_instructions = parser.get_format_instructions()
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are a coding assistant with expertise in LangChain. \n
Here is relevant context:
\n ------- \n
{context}
\n ------- \n
Now, answer the user question based on the above provided documentation: {question}
Output format instructions: \n {format_instructions}
""",
input_variables=["question"],
partial_variables={"format_instructions": format_instructions},
)
# LLM
model = ChatOpenAI(model="gpt-4-1106-preview")
# Chain
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
)
# Add typing for input
class Question(BaseModel):
__root__: str
chain = chain.with_types(input_type=Question)
@@ -1,67 +0,0 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel
# Load LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
loader = RecursiveUrlLoader(
url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()
# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_pydantic = loader.load()
# LCEL w/ Self Query (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_sq = loader.load()
# Add
docs.extend([*docs_pydantic, *docs_sq])
# Sort the list based on the URLs in 'metadata' -> 'source'
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
# Concatenate the 'page_content' of each sorted dictionary
concatenated_content = "\n\n\n --- \n\n\n".join(
[doc.page_content for doc in d_reversed]
)
# Prompt template
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. Here is a full set of documentation:
{context}
Now, answer the user question based on the above provided documentation: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
chain = (
{
"context": lambda x: concatenated_content,
"question": RunnablePassthrough(),
}
| prompt
| model
| StrOutputParser()
)
# Add typing for input
class Question(BaseModel):
__root__: str
chain = chain.with_types(input_type=Question)
@@ -1,86 +0,0 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
# Load LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
loader = RecursiveUrlLoader(
url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()
# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_pydantic = loader.load()
# LCEL w/ Self Query (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_sq = loader.load()
# Add
docs.extend([*docs_pydantic, *docs_sq])
# Sort the list based on the URLs in 'metadata' -> 'source'
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
# Concatenate the 'page_content' of each sorted dictionary
concatenated_content = "\n\n\n --- \n\n\n".join(
[doc.page_content for doc in d_reversed]
)
# Output
class FunctionOutput(BaseModel):
prefix: str = Field(description="The prefix of the output")
code_block: str = Field(description="The code block of the output")
# Create an instance of the PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=FunctionOutput)
# Get the format instructions from the output parser
format_instructions = parser.get_format_instructions()
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n
Here is a full set of LCEL documentation:
\n ------- \n
{context}
\n ------- \n
Now, answer the user question based on the above provided documentation: {question}
Output format instructions: \n {format_instructions}
""",
input_variables=["question"],
partial_variables={"format_instructions": format_instructions},
)
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
chain = (
{
"context": lambda x: concatenated_content,
"question": RunnablePassthrough(),
}
| prompt
| model
| StrOutputParser()
)
# Add typing for input
class Question(BaseModel):
__root__: str
chain = chain.with_types(input_type=Question)
@@ -1,87 +0,0 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
# Load LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
loader = RecursiveUrlLoader(
url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()
# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_pydantic = loader.load()
# LCEL w/ Self Query (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_sq = loader.load()
# Add
docs.extend([*docs_pydantic, *docs_sq])
# Sort the list based on the URLs in 'metadata' -> 'source'
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
# Concatenate the 'page_content' of each sorted dictionary
concatenated_content = "\n\n\n --- \n\n\n".join(
[doc.page_content for doc in d_reversed]
)
# Output
class FunctionOutput(BaseModel):
prefix: str = Field(description="The prefix of the output")
code_block: str = Field(description="The code block of the output")
# Create an instance of the PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=FunctionOutput)
# Get the format instructions from the output parser
format_instructions = parser.get_format_instructions()
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n
Here is a full set of LCEL documentation:
\n ------- \n
{context}
\n ------- \n
Now, answer the user question based on the above provided documentation and ensure any code you provide can be executed with all required imports and variables defined: {question}
Output format instructions: \n {format_instructions}
""",
input_variables=["question"],
partial_variables={"format_instructions": format_instructions},
)
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
chain = (
{
"context": lambda x: concatenated_content,
"question": RunnablePassthrough(),
}
| prompt
| model
| StrOutputParser()
)
# Add typing for input
class Question(BaseModel):
__root__: str
chain = chain.with_types(input_type=Question)
+39 -12
View File
@@ -1,10 +1,11 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.utils.function_calling import convert_to_openai_tool
# Load LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
@@ -39,15 +40,41 @@ concatenated_content = "\n\n\n --- \n\n\n".join(
[doc.page_content for doc in d_reversed]
)
# Prompt template
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. Here is a full set of documentation:
{context}
## Data model
class code(BaseModel):
"""Code output"""
prefix: str = Field(description="Description of the problem and approach")
imports: str = Field(description="Code block import statements")
code: str = Field(description="Code block not including import statements")
Now, answer the user question based on the above provided documentation: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
## LLM
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
# Tool
code_tool_oai = convert_to_openai_tool(code)
# LLM with tool and enforce invocation
llm_with_tool = model.bind(
tools=[convert_to_openai_tool(code_tool_oai)],
tool_choice={"type": "function", "function": {"name": "code"}},
)
# Parser
parser_tool = PydanticToolsParser(tools=[code])
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n
Here is a full set of LCEL documentation:
\n ------- \n
{context}
\n ------- \n
Answer the user question based on the above provided documentation. \n
Ensure any code you provide can be executed with all required imports and variables defined. \n
Structure your answer with a description of the code solution. \n
Then list the imports. And finally list the functioning code block. \n
Here is the user question: \n --- --- --- \n {question}""",
input_variables=["question","context"])
chain = (
{
@@ -55,8 +82,8 @@ chain = (
"question": RunnablePassthrough(),
}
| prompt
| model
| StrOutputParser()
| llm_with_tool
| parser_tool
)
# Add typing for input
@@ -1,87 +0,0 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
# Load LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
loader = RecursiveUrlLoader(
url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()
# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_pydantic = loader.load()
# LCEL w/ Self Query (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_sq = loader.load()
# Add
docs.extend([*docs_pydantic, *docs_sq])
# Sort the list based on the URLs in 'metadata' -> 'source'
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
# Concatenate the 'page_content' of each sorted dictionary
concatenated_content = "\n\n\n --- \n\n\n".join(
[doc.page_content for doc in d_reversed]
)
# Output
class FunctionOutput(BaseModel):
prefix: str = Field(description="The prefix of the output")
code_block: str = Field(description="The code block of the output")
# Create an instance of the PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=FunctionOutput)
# Get the format instructions from the output parser
format_instructions = parser.get_format_instructions()
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n
Here is a full set of LCEL documentation:
\n ------- \n
{context}
\n ------- \n
Now, answer the user question based on the above provided documentation: {question}
Output format instructions: \n {format_instructions}
""",
input_variables=["question"],
partial_variables={"format_instructions": format_instructions},
)
model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
chain = (
{
"context": lambda x: concatenated_content,
"question": RunnablePassthrough(),
}
| prompt
| model
| StrOutputParser()
)
# Add typing for input
class Question(BaseModel):
__root__: str
chain = chain.with_types(input_type=Question)
+336
View File
@@ -0,0 +1,336 @@
from operator import itemgetter
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.utils.function_calling import convert_to_openai_tool
from langgraph.graph import END, StateGraph
from typing import Dict, TypedDict
from langchain_core.messages import BaseMessage
class GraphState(TypedDict):
"""
Represents the state of our graph.
Attributes:
keys: A dictionary where each key is a string.
"""
keys: Dict[str, any]
def generate(state):
"""
Generate a code solution based on LCEL docs and the input question
with optional feedback from code execution tests
Args:
state (dict): The current graph state
Returns:
state (dict): New key added to state, documents, that contains retrieved documents
"""
## State
state_dict = state["keys"]
question = state_dict["question"]
## Context
# LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
loader = RecursiveUrlLoader(
url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()
# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_pydantic = loader.load()
# LCEL w/ Self Query (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
loader = RecursiveUrlLoader(
url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_sq = loader.load()
# Add
docs.extend([*docs_pydantic, *docs_sq])
# Sort the list based on the URLs in 'metadata' -> 'source'
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
# Concatenate the 'page_content' of each sorted dictionary
concatenated_content = "\n\n\n --- \n\n\n".join(
[doc.page_content for doc in d_reversed]
)
## Data model
class code(BaseModel):
"""Code output"""
prefix: str = Field(description="Description of the problem and approach")
imports: str = Field(description="Code block import statements")
code: str = Field(description="Code block not including import statements")
## LLM
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
# Tool
code_tool_oai = convert_to_openai_tool(code)
# LLM with tool and enforce invocation
llm_with_tool = model.bind(
tools=[convert_to_openai_tool(code_tool_oai)],
tool_choice={"type": "function", "function": {"name": "code"}},
)
# Parser
parser_tool = PydanticToolsParser(tools=[code])
## Prompt
template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n
Here is a full set of LCEL documentation:
\n ------- \n
{context}
\n ------- \n
Answer the user question based on the above provided documentation. \n
Ensure any code you provide can be executed with all required imports and variables defined. \n
Structure your answer with a description of the code solution. \n
Then list the imports. And finally list the functioning code block. \n
Here is the user question: \n --- --- --- \n {question}"""
## Generation
if "error" in state_dict:
print("---RE-GENERATE SOLUTION w/ ERROR FEEDBACK---")
error = state_dict["error"]
code_solution = state_dict["generation"]
# Udpate prompt
addendum = """ \n --- --- --- \n You previously tried to solve this problem. \n Here is your solution:
\n --- --- --- \n {generation} \n --- --- --- \n Here is the resulting error from code
execution: \n --- --- --- \n {error} \n --- --- --- \n Please re-try to answer this.
Structure your answer with a description of the code solution. \n Then list the imports.
And finally list the functioning code block. Structure your answer with a description of
the code solution. \n Then list the imports. And finally list the functioning code block.
\n Here is the user question: \n --- --- --- \n {question}"""
template = template + addendum
# Prompt
prompt = PromptTemplate(
template=template,
input_variables=["context", "question", "generation", "error"],
)
# Chain
chain = (
{
"context": lambda x: concatenated_content,
"question": itemgetter("question"),
"generation": itemgetter("generation"),
"error": itemgetter("error"),
}
| prompt
| llm_with_tool
| parser_tool
)
code_solution = chain.invoke({"question":question,
"generation":str(code_solution[0]),
"error":error})
else:
print("---GENERATE SOLUTION---")
# Prompt
prompt = PromptTemplate(
template=template,
input_variables=["context", "question"],
)
# Chain
chain = (
{
"context": lambda x: concatenated_content,
"question": itemgetter("question"),
}
| prompt
| llm_with_tool
| parser_tool
)
code_solution = chain.invoke({"question":question})
return {"keys": {"generation": code_solution, "question": question}}
def check_code_imports(state):
"""
Check imports
Args:
state (dict): The current graph state
Returns:
state (dict): New key added to state, error
"""
## State
print("---CHECKING CODE IMPORTS---")
state_dict = state["keys"]
question = state_dict["question"]
code_solution = state_dict["generation"]
imports = code_solution[0].imports
try:
# Attempt to execute the imports
exec(imports)
except Exception as e:
print("---CODE IMPORT CHECK: FAILED---")
# Catch any error during execution (e.g., ImportError, SyntaxError)
error = f"Execution error: {e}"
if "error" in state_dict:
error_prev_runs = state_dict["error"]
error = error_prev_runs + "\n --- Most recent run error --- \n" + error
else:
print("---CODE IMPORT CHECK: SUCCESS---")
# No errors occurred
error = "None"
return {"keys": {"generation": code_solution, "question": question, "error": error}}
def check_code_execution(state):
"""
Check code block execution
Args:
state (dict): The current graph state
Returns:
state (dict): New key added to state, error
"""
## State
print("---CHECKING CODE EXECUTION---")
state_dict = state["keys"]
question = state_dict["question"]
code_solution = state_dict["generation"]
imports = code_solution[0].imports
code = code_solution[0].code
code_block = imports +"\n"+ code
try:
# Attempt to execute the code block
exec(code_block)
except Exception as e:
print("---CODE BLOCK CHECK: FAILED---")
# Catch any error during execution (e.g., ImportError, SyntaxError)
error = f"Execution error: {e}"
if "error" in state_dict:
error_prev_runs = state_dict["error"]
error = error_prev_runs + "\n --- Most recent run error --- \n" + error
else:
print("---CODE BLOCK CHECK: SUCCESS---")
# No errors occurred
error = "None"
return {"keys": {"generation": code_solution, "question": question, "error": error}}
### Edges
def decide_to_check_code_exec(state):
"""
Determines whether to test code execution, or re-try answer generation.
Args:
state (dict): The current state of the agent, including all keys.
Returns:
str: Next node to call
"""
print("---DECIDE TO TEST CODE EXECUTION---")
state_dict = state["keys"]
question = state_dict["question"]
code_solution = state_dict["generation"]
error = state_dict["error"]
if error == "None":
# All documents have been filtered check_relevance
# We will re-generate a new query
print("---DECISION: TEST CODE EXECUTION---")
return "check_code_execution"
else:
# We have relevant documents, so generate answer
print("---DECISION: RE-TRY SOLUTION---")
return "generate"
def decide_to_finish(state):
"""
Determines whether to finish.
Args:
state (dict): The current state of the agent, including all keys.
Returns:
str: Next node to call
"""
print("---DECIDE TO TEST CODE EXECUTION---")
state_dict = state["keys"]
question = state_dict["question"]
code_solution = state_dict["generation"]
error = state_dict["error"]
if error == "None":
# All documents have been filtered check_relevance
# We will re-generate a new query
print("---DECISION: TEST CODE EXECUTION---")
return "end"
else:
# We have relevant documents, so generate answer
print("---DECISION: RE-TRY SOLUTION---")
return "generate"
# Flow
workflow = StateGraph(GraphState)
# Define the nodes
workflow.add_node("generate", generate) # generation solution
workflow.add_node("check_code_imports", check_code_imports) # check imports
workflow.add_node("check_code_execution", check_code_execution) # check execution
# Build graph
workflow.set_entry_point("generate")
workflow.add_edge("generate", "check_code_imports")
workflow.add_conditional_edges(
"check_code_imports",
decide_to_check_code_exec,
{
"check_code_execution": "check_code_execution",
"generate": "generate",
},
)
workflow.add_conditional_edges(
"check_code_execution",
decide_to_finish,
{
"end": END,
"generate": "generate",
},
)
# Compile
app = workflow.compile()
+43 -17
View File
@@ -23,7 +23,9 @@ from langchain.schema.runnable import (
)
from langchain.vectorstores import Weaviate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.utils.function_calling import convert_to_openai_tool
# Prompts
from .prompts import REPHRASE_TEMPLATE, RESPONSE_TEMPLATE
@@ -196,24 +198,48 @@ sub_question_answer_chain = (
| RunnablePassthrough.assign(answer=answer_chain).map()
)
# Prompt template for final answer
template = """You are an expert coder. You got a high level question:
## Data model
class code(BaseModel):
"""Code output"""
prefix: str = Field(description="Description of the problem and approach")
imports: str = Field(description="Code block import statements")
code: str = Field(description="Code block not including import statements")
<question>
{question}
</question>
## LLM
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
Based on this question, you broke it down into sub questions and answered those. These are the results of that:
# Tool
code_tool_oai = convert_to_openai_tool(code)
<subquestions>
{subq}
</subquestions>
Now, combine all the subquestion answers to generate a final code snippet writing the code that was asked for.
"""
prompt = ChatPromptTemplate.from_template(template)
# LLM with tool and enforce invocation
llm_with_tool = model.bind(
tools=[convert_to_openai_tool(code_tool_oai)],
tool_choice={"type": "function", "function": {"name": "code"}},
)
llm = ChatOpenAI(temperature=0, model="gpt-4")
# Parser
parser_tool = PydanticToolsParser(tools=[code])
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are an expert coder. You got a high level question:
<question>
{question}
</question>
Based on this question, you broke it down into sub questions and answered those. These are the results of that:
<subquestions>
{subq}
</subquestions>
Ensure any code you provide can be executed with all required imports and variables defined. \n
Structure your answer with a description of the code solution. \n
Then list the imports. And finally list the functioning code block. \n
""",
input_variables=["question","subq"],
)
# Answer chain
chain = (
@@ -229,8 +255,8 @@ chain = (
)
)
| prompt
| llm
| StrOutputParser()
| llm_with_tool
| parser_tool
)
# Add typing for input
@@ -1,257 +0,0 @@
import os
from operator import itemgetter
from typing import Dict, List, Optional, Sequence
import weaviate
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.voyageai import VoyageEmbeddings
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import AIMessage, HumanMessage
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.retriever import BaseRetriever
from langchain.schema.runnable import (
Runnable,
RunnableBranch,
RunnableLambda,
RunnableMap,
)
from langchain.vectorstores import Weaviate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
# Prompts
from .prompts import REPHRASE_TEMPLATE, RESPONSE_TEMPLATE
# Keys
WEAVIATE_URL = os.environ["WEAVIATE_URL"]
WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
# Define the data structure for chat requests
class ChatRequest(BaseModel):
question: str # The question asked in the chat
chat_history: Optional[List[Dict[str, str]]] # Optional chat history
# Function to get the embeddings model based on environment variables
def get_embeddings_model() -> Embeddings:
# Check for specific environment variables to determine the embeddings model
if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
# Default to OpenAI embeddings if the specific environment variables are not set
return OpenAIEmbeddings(chunk_size=200)
# Function to initialize and return the retriever
def get_retriever() -> BaseRetriever:
# Initialize Weaviate client with authentication and connection details
weaviate_client = weaviate.Client(
url=WEAVIATE_URL,
auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
)
# Configure the Weaviate client with specific settings
weaviate_client = Weaviate(
client=weaviate_client,
index_name=WEAVIATE_DOCS_INDEX_NAME,
text_key="text",
embedding=get_embeddings_model(),
by_text=False,
attributes=["source", "title"],
)
# Return the configured retriever
return weaviate_client.as_retriever(search_kwargs=dict(k=6))
# Function to create a chain of retrievers
def create_retriever_chain(
llm: BaseLanguageModel, retriever: BaseRetriever
) -> Runnable:
# Template to condense the question
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(REPHRASE_TEMPLATE)
# Create a chain to process the question and retrieve relevant information
condense_question_chain = (
CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()
).with_config(
run_name="CondenseQuestion",
)
conversation_chain = condense_question_chain | retriever
# Return a branch of runnables depending on whether there's chat history
return RunnableBranch(
(
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
run_name="HasChatHistoryCheck"
),
conversation_chain.with_config(run_name="RetrievalChainWithHistory"),
),
(
RunnableLambda(itemgetter("question")).with_config(
run_name="Itemgetter:question"
)
| retriever
).with_config(run_name="RetrievalChainWithNoHistory"),
).with_config(run_name="RouteDependingOnChatHistory")
# Function to format the retrieved documents
def format_docs(docs: Sequence[Document]) -> str:
formatted_docs = []
# Iterate through each document and format it
for i, doc in enumerate(docs):
doc_string = f"<doc id='{i}'>{doc.page_content}</doc>"
formatted_docs.append(doc_string)
return "\n".join(formatted_docs)
# Function to serialize the chat history from a chat request
def serialize_history(request: ChatRequest):
chat_history = request["chat_history"] or []
converted_chat_history = []
# Convert each message in the chat history to the appropriate message type
for message in chat_history:
if message.get("human") is not None:
converted_chat_history.append(HumanMessage(content=message["human"]))
if message.get("ai") is not None:
converted_chat_history.append(AIMessage(content=message["ai"]))
return converted_chat_history
# Function to create the answer chain
def create_question_anwser_chain(
llm: BaseLanguageModel,
retriever: BaseRetriever,
) -> Runnable:
# Create a retriever chain and configure it
retriever_chain = create_retriever_chain(
llm,
retriever,
).with_config(run_name="FindDocs")
_context = RunnableMap(
{
"context": retriever_chain | format_docs,
"question": itemgetter("question"),
}
).with_config(run_name="RetrieveDocs")
# Define the chat prompt template
prompt = ChatPromptTemplate.from_messages(
[
("system", RESPONSE_TEMPLATE),
("human", "{question}"),
]
)
# Create a response synthesizer using the defined prompt
response_synthesizer = (prompt | llm | StrOutputParser()).with_config(
run_name="GenerateResponse",
)
# Return the final chain of processes
return (
{
"question": RunnableLambda(itemgetter("question")).with_config(
run_name="Itemgetter:question"
),
}
| _context
| response_synthesizer
)
# Retriever
retriever = get_retriever()
# Sub-question prompt
sub_question_prompt = hub.pull("hwchase17/code-langchain-sub-question")
# Chain for sub-question generation
sub_question_chain = (
RunnablePassthrough.assign(context=(lambda x: x["question"]) | retriever)
| sub_question_prompt
| ChatOpenAI(model="gpt-4-1106-preview")
| SimpleJsonOutputParser()
)
# LLM
llm = ChatOpenAI(
model="gpt-3.5-turbo-16k",
streaming=True,
temperature=0,
)
# Chain that answers questions
answer_chain = create_question_anwser_chain(
llm,
retriever,
)
# Chain for sub-question answering
sub_question_answer_chain = (
sub_question_chain
| (lambda x: [{"question": v} for v in x])
| RunnablePassthrough.assign(answer=answer_chain).map()
)
# Output
class FunctionOutput(BaseModel):
prefix: str = Field(description="The prefix of the output")
code_block: str = Field(description="The code block of the output")
# Create an instance of the PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=FunctionOutput)
# Get the format instructions from the output parser
format_instructions = parser.get_format_instructions()
# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
template = """You are an expert coder. You got a high level question:
<question>
{question}
</question>
Based on this question, you broke it down into sub questions and answered those. These are the results of that:
<subquestions>
{subq}
</subquestions>
Now, combine all the subquestion answers to generate a final code snippet writing the code that was asked for.
Output format instructions: \n {format_instructions}
""",
input_variables=["question"],
partial_variables={"format_instructions": format_instructions},
)
llm = ChatOpenAI(temperature=0, model="gpt-4")
# Answer chain
chain = (
RunnablePassthrough().assign(
subq=sub_question_answer_chain
| (
lambda sub_questions_answers: "\n\n".join(
[
f"Question: {q['question']}\n\nAnswer: {q['answer']}"
for q in sub_questions_answers
]
)
)
)
| prompt
| llm
| StrOutputParser()
)
# Add typing for input
class Question(BaseModel):
__root__: str
chain = chain.with_types(input_type=Question)
+4 -13
View File
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "e2de8bbc-b907-440e-9860-2722a32cfa13",
"metadata": {},
"outputs": [],
@@ -32,16 +32,14 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"id": "910170aa-e10f-4907-a99c-866fc1b008d0",
"metadata": {},
"outputs": [],
"source": [
"from app.test_chains.base_rag_chain import chain as base_rag\n",
"from app.test_chains.multi_query_chain import chain as multi_query\n",
"from app.test_chains.context_stuffing_chain import chain as context_stuffing\n",
"from app.test_chains.context_stuffing_0125_chain import chain as context_stuffing_0125\n",
"from app.test_chains.context_stuffing_0125_chain_prompt import chain as context_stuffing_0125_prompt"
"from app.test_chains.context_stuffing_chain import chain as context_stuffing"
]
},
{
@@ -89,8 +87,6 @@
" \"base-case-rag\":base_rag,\n",
" \"multi-query\":multi_query,\n",
" \"context-stuffing\": context_stuffing,\n",
" \"context-stuffing-0125\": context_stuffing_0125,\n",
" \"context-stuffing-0125-prompt\": context_stuffing_0125_prompt,\n",
"}\n",
"\n",
"# Run evaluation\n",
@@ -172,11 +168,6 @@
"# Results \n",
"df_cot_eval = pd.read_csv(\"CoT_eval_result.csv\")\n",
"\n",
"# Minor change to naming\n",
"df_cot_eval['chain'].replace({'context-stuffing-0125-prompt': 'cs-0125-pe', \n",
" 'context-stuffing-0125': 'cs-0125', \n",
" 'context-stuffing-1106': 'cs-1106'}, inplace=True)\n",
"\n",
"# Stats\n",
"std_errors = df_cot_eval.groupby([\"chain\"]).apply(group_standard_error)\n",
"grouped_frac_correct = df_cot_eval.groupby('chain')['correct'].sum() / df_cot_eval.groupby('chain')['correct'].count() \n",
@@ -529,7 +520,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.9.16"
}
},
"nbformat": 4,