Add LangGraph, cleanup old / redundant chains

2026-07-01 12:46:37 -04:00 · 2024-02-20 15:12:01 -08:00
parent 4daf17315a
commit 1862687d68
11 changed files with 462 additions and 724 deletions
@@ -1,11 +1,12 @@
 import weaviate
-from langchain_openai import ChatOpenAI
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain.vectorstores import Weaviate
-from langchain.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
+from langchain.prompts import PromptTemplate
 from langchain.embeddings.voyageai import VoyageEmbeddings
-from langchain_core.runnables import RunnableLambda, RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers.openai_tools import PydanticToolsParser
+from langchain_core.utils.function_calling import convert_to_openai_tool

 # Keys
 import os
@@ -39,23 +40,47 @@ def get_retriever():
 # Retriever
 retriever = get_retriever()

-# Prompt
-template = """Answer the question based only on the following context:
-{context}
+## Data model
+class code(BaseModel):
+    """Code output"""
+    prefix: str = Field(description="Description of the problem and approach")
+    imports: str = Field(description="Code block import statements")
+    code: str = Field(description="Code block not including import statements")

-Question: {question}
-"""
-prompt = ChatPromptTemplate.from_template(template)
+## LLM
+model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

-# LLM
-model = ChatOpenAI(model="gpt-4-1106-preview")
+# Tool
+code_tool_oai = convert_to_openai_tool(code)
+
+# LLM with tool and enforce invocation
+llm_with_tool = model.bind(
+    tools=[convert_to_openai_tool(code_tool_oai)],
+    tool_choice={"type": "function", "function": {"name": "code"}},
+)
+
+# Parser
+parser_tool = PydanticToolsParser(tools=[code])
+
+# Create a prompt template with format instructions and the query
+prompt = PromptTemplate(
+    template = """You are a coding assistant with expertise in LangChain. \n 
+    Here is relevant context: 
+    \n ------- \n
+    {context} 
+    \n ------- \n
+    Ensure any code you provide can be executed with all required imports and variables defined. \n
+    Structure your answer with a description of the code solution. \n
+    Then list the imports. And finally list the functioning code block. \n
+    Here is the user question: \n --- --- --- \n {question}""",
+    input_variables=["question","context"])

 # Chain
 chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
-    | model
-    | StrOutputParser()
+    | llm_with_tool
+    | parser_tool
 )

 # Add typing for input
@@ -1,83 +0,0 @@
-import weaviate
-from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from langchain.vectorstores import Weaviate
-from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain.embeddings.voyageai import VoyageEmbeddings
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain.output_parsers import PydanticOutputParser
-
-# Keys
-import os
-
-WEAVIATE_URL = os.environ["WEAVIATE_URL"]
-WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
-WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
-
-# Fine-tuned embd and vectorstore
-def get_embeddings_model():
-    if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
-        return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
-    return OpenAIEmbeddings(chunk_size=200)
-
-
-def get_retriever():
-    weaviate_client = weaviate.Client(
-        url=WEAVIATE_URL,
-        auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
-    )
-    weaviate_client = Weaviate(
-        client=weaviate_client,
-        index_name=WEAVIATE_DOCS_INDEX_NAME,
-        text_key="text",
-        embedding=get_embeddings_model(),
-        by_text=False,
-        attributes=["source", "title"],
-    )
-    return weaviate_client.as_retriever(search_kwargs=dict(k=6))
-
-# Retriever
-retriever = get_retriever()
-
-# Output
-class FunctionOutput(BaseModel):
-    prefix: str = Field(description="The prefix of the output")
-    code_block: str = Field(description="The code block of the output")
-
-# Create an instance of the PydanticOutputParser
-parser = PydanticOutputParser(pydantic_object=FunctionOutput)
-
-# Get the format instructions from the output parser
-format_instructions = parser.get_format_instructions()
-
-# Create a prompt template with format instructions and the query
-prompt = PromptTemplate(
-    template = """You are a coding assistant with expertise in LangChain. \n 
-    Here is relevant context: 
-    \n ------- \n
-    {context} 
-    \n ------- \n
-    Now, answer the user question based on the above provided documentation: {question}
-    Output format instructions: \n {format_instructions}
-    """,
-    input_variables=["question"],
-    partial_variables={"format_instructions": format_instructions},
-)
-
-# LLM
-model = ChatOpenAI(model="gpt-4-1106-preview")
-
-# Chain
-chain = (
-    {"context": retriever, "question": RunnablePassthrough()}
-    | prompt
-    | model
-    | StrOutputParser()
-)
-
-# Add typing for input
-class Question(BaseModel):
-    __root__: str
-
-chain = chain.with_types(input_type=Question)
@@ -1,67 +0,0 @@
-from bs4 import BeautifulSoup as Soup
-from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
-from langchain_openai import ChatOpenAI
-from langchain.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel
-
-# Load LCEL docs
-url = "https://python.langchain.com/docs/expression_language/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs = loader.load()
-
-# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_pydantic = loader.load()
-
-# LCEL w/ Self Query (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_sq = loader.load()
-
-# Add 
-docs.extend([*docs_pydantic, *docs_sq])
-
-# Sort the list based on the URLs in 'metadata' -> 'source'
-d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
-d_reversed = list(reversed(d_sorted))
-
-# Concatenate the 'page_content' of each sorted dictionary
-concatenated_content = "\n\n\n --- \n\n\n".join(
-    [doc.page_content for doc in d_reversed]
-)
-
-# Prompt template
-template = """You are a coding assistant with expertise in LCEL, LangChain expression language. Here is a full set of documentation:
-{context}
-
-Now, answer the user question based on the above provided documentation: {question}
-"""
-prompt = ChatPromptTemplate.from_template(template)
-
-model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview") 
-
-chain = (
-    {
-        "context": lambda x: concatenated_content,
-        "question": RunnablePassthrough(),
-    }
-    | prompt
-    | model
-    | StrOutputParser()
-)
-
-# Add typing for input
-class Question(BaseModel):
-    __root__: str
-
-
-chain = chain.with_types(input_type=Question)
@@ -1,86 +0,0 @@
-from bs4 import BeautifulSoup as Soup
-from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
-from langchain_openai import ChatOpenAI
-from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain.output_parsers import PydanticOutputParser
-
-# Load LCEL docs
-url = "https://python.langchain.com/docs/expression_language/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs = loader.load()
-
-# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_pydantic = loader.load()
-
-# LCEL w/ Self Query (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_sq = loader.load()
-
-# Add 
-docs.extend([*docs_pydantic, *docs_sq])
-
-# Sort the list based on the URLs in 'metadata' -> 'source'
-d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
-d_reversed = list(reversed(d_sorted))
-
-# Concatenate the 'page_content' of each sorted dictionary
-concatenated_content = "\n\n\n --- \n\n\n".join(
-    [doc.page_content for doc in d_reversed]
-)
-
-# Output
-class FunctionOutput(BaseModel):
-    prefix: str = Field(description="The prefix of the output")
-    code_block: str = Field(description="The code block of the output")
-
-# Create an instance of the PydanticOutputParser
-parser = PydanticOutputParser(pydantic_object=FunctionOutput)
-
-# Get the format instructions from the output parser
-format_instructions = parser.get_format_instructions()
-
-
-# Create a prompt template with format instructions and the query
-prompt = PromptTemplate(
-    template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
-    Here is a full set of LCEL documentation: 
-    \n ------- \n
-    {context} 
-    \n ------- \n
-    Now, answer the user question based on the above provided documentation: {question}
-    Output format instructions: \n {format_instructions}
-    """,
-    input_variables=["question"],
-    partial_variables={"format_instructions": format_instructions},
-)
-
-model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview") 
-
-chain = (
-    {
-        "context": lambda x: concatenated_content,
-        "question": RunnablePassthrough(),
-    }
-    | prompt
-    | model
-    | StrOutputParser()
-)
-
-# Add typing for input
-class Question(BaseModel):
-    __root__: str
-
-
-chain = chain.with_types(input_type=Question)
@@ -1,87 +0,0 @@
-from bs4 import BeautifulSoup as Soup
-from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
-from langchain_openai import ChatOpenAI
-from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain.output_parsers import PydanticOutputParser
-
-
-# Load LCEL docs
-url = "https://python.langchain.com/docs/expression_language/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs = loader.load()
-
-# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_pydantic = loader.load()
-
-# LCEL w/ Self Query (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_sq = loader.load()
-
-# Add 
-docs.extend([*docs_pydantic, *docs_sq])
-
-# Sort the list based on the URLs in 'metadata' -> 'source'
-d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
-d_reversed = list(reversed(d_sorted))
-
-# Concatenate the 'page_content' of each sorted dictionary
-concatenated_content = "\n\n\n --- \n\n\n".join(
-    [doc.page_content for doc in d_reversed]
-)
-
-# Output
-class FunctionOutput(BaseModel):
-    prefix: str = Field(description="The prefix of the output")
-    code_block: str = Field(description="The code block of the output")
-
-# Create an instance of the PydanticOutputParser
-parser = PydanticOutputParser(pydantic_object=FunctionOutput)
-
-# Get the format instructions from the output parser
-format_instructions = parser.get_format_instructions()
-
-
-# Create a prompt template with format instructions and the query
-prompt = PromptTemplate(
-    template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
-    Here is a full set of LCEL documentation: 
-    \n ------- \n
-    {context} 
-    \n ------- \n
-    Now, answer the user question based on the above provided documentation and ensure any code you provide can be executed with all required imports and variables defined: {question}
-    Output format instructions: \n {format_instructions}
-    """,
-    input_variables=["question"],
-    partial_variables={"format_instructions": format_instructions},
-)
-
-model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview") 
-
-chain = (
-    {
-        "context": lambda x: concatenated_content,
-        "question": RunnablePassthrough(),
-    }
-    | prompt
-    | model
-    | StrOutputParser()
-)
-
-# Add typing for input
-class Question(BaseModel):
-    __root__: str
-
-
-chain = chain.with_types(input_type=Question)
@@ -1,10 +1,11 @@
 from bs4 import BeautifulSoup as Soup
 from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
 from langchain_openai import ChatOpenAI
-from langchain.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
+from langchain.prompts import PromptTemplate
 from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers.openai_tools import PydanticToolsParser
+from langchain_core.utils.function_calling import convert_to_openai_tool

 # Load LCEL docs
 url = "https://python.langchain.com/docs/expression_language/"
@@ -39,15 +40,41 @@ concatenated_content = "\n\n\n --- \n\n\n".join(
    [doc.page_content for doc in d_reversed]
 )

-# Prompt template
-template = """You are a coding assistant with expertise in LCEL, LangChain expression language. Here is a full set of documentation:
-{context}
+## Data model
+class code(BaseModel):
+    """Code output"""
+    prefix: str = Field(description="Description of the problem and approach")
+    imports: str = Field(description="Code block import statements")
+    code: str = Field(description="Code block not including import statements")

-Now, answer the user question based on the above provided documentation: {question}
-"""
-prompt = ChatPromptTemplate.from_template(template)
+## LLM
+model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

-model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview") 
+# Tool
+code_tool_oai = convert_to_openai_tool(code)
+
+# LLM with tool and enforce invocation
+llm_with_tool = model.bind(
+    tools=[convert_to_openai_tool(code_tool_oai)],
+    tool_choice={"type": "function", "function": {"name": "code"}},
+)
+
+# Parser
+parser_tool = PydanticToolsParser(tools=[code])
+
+# Create a prompt template with format instructions and the query
+prompt = PromptTemplate(
+    template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
+        Here is a full set of LCEL documentation: 
+        \n ------- \n
+        {context} 
+        \n ------- \n
+        Answer the user question based on the above provided documentation. \n
+        Ensure any code you provide can be executed with all required imports and variables defined. \n
+        Structure your answer with a description of the code solution. \n
+        Then list the imports. And finally list the functioning code block. \n
+        Here is the user question: \n --- --- --- \n {question}""",
+    input_variables=["question","context"])

 chain = (
    {
@@ -55,8 +82,8 @@ chain = (
        "question": RunnablePassthrough(),
    }
    | prompt
-    | model
-    | StrOutputParser()
+    | llm_with_tool
+    | parser_tool
 )

 # Add typing for input
@@ -1,87 +0,0 @@
-from bs4 import BeautifulSoup as Soup
-from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
-from langchain_openai import ChatOpenAI
-from langchain.prompts import PromptTemplate
-from langchain.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain.output_parsers import PydanticOutputParser
-
-# Load LCEL docs
-url = "https://python.langchain.com/docs/expression_language/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs = loader.load()
-
-# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_pydantic = loader.load()
-
-# LCEL w/ Self Query (outside the primary LCEL docs)
-url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
-loader = RecursiveUrlLoader(
-    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
-)
-docs_sq = loader.load()
-
-# Add 
-docs.extend([*docs_pydantic, *docs_sq])
-
-# Sort the list based on the URLs in 'metadata' -> 'source'
-d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
-d_reversed = list(reversed(d_sorted))
-
-# Concatenate the 'page_content' of each sorted dictionary
-concatenated_content = "\n\n\n --- \n\n\n".join(
-    [doc.page_content for doc in d_reversed]
-)
-
-# Output
-class FunctionOutput(BaseModel):
-    prefix: str = Field(description="The prefix of the output")
-    code_block: str = Field(description="The code block of the output")
-
-# Create an instance of the PydanticOutputParser
-parser = PydanticOutputParser(pydantic_object=FunctionOutput)
-
-# Get the format instructions from the output parser
-format_instructions = parser.get_format_instructions()
-
-
-# Create a prompt template with format instructions and the query
-prompt = PromptTemplate(
-    template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
-    Here is a full set of LCEL documentation: 
-    \n ------- \n
-    {context} 
-    \n ------- \n
-    Now, answer the user question based on the above provided documentation: {question}
-    Output format instructions: \n {format_instructions}
-    """,
-    input_variables=["question"],
-    partial_variables={"format_instructions": format_instructions},
-)
-
-model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview") 
-
-chain = (
-    {
-        "context": lambda x: concatenated_content,
-        "question": RunnablePassthrough(),
-    }
-    | prompt
-    | model
-    | StrOutputParser()
-)
-
-# Add typing for input
-class Question(BaseModel):
-    __root__: str
-
-
-chain = chain.with_types(input_type=Question)
@@ -0,0 +1,336 @@
+from operator import itemgetter
+from bs4 import BeautifulSoup as Soup
+from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
+from langchain_openai import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers import PydanticOutputParser
+from langchain.output_parsers.openai_tools import PydanticToolsParser
+from langchain_core.utils.function_calling import convert_to_openai_tool
+from langgraph.graph import END, StateGraph
+
+from typing import Dict, TypedDict
+
+from langchain_core.messages import BaseMessage
+
+
+class GraphState(TypedDict):
+    """
+    Represents the state of our graph.
+
+    Attributes:
+        keys: A dictionary where each key is a string.
+    """
+
+    keys: Dict[str, any]
+
+
+def generate(state):
+    """
+    Generate a code solution based on LCEL docs and the input question 
+    with optional feedback from code execution tests 
+
+    Args:
+        state (dict): The current graph state
+
+    Returns:
+        state (dict): New key added to state, documents, that contains retrieved documents
+    """
+    
+    ## State
+    state_dict = state["keys"]
+    question = state_dict["question"]
+    
+    ## Context 
+    # LCEL docs
+    url = "https://python.langchain.com/docs/expression_language/"
+    loader = RecursiveUrlLoader(
+        url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
+    )
+    docs = loader.load()
+    
+    # LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
+    url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
+    loader = RecursiveUrlLoader(
+        url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
+    )
+    docs_pydantic = loader.load()
+    
+    # LCEL w/ Self Query (outside the primary LCEL docs)
+    url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
+    loader = RecursiveUrlLoader(
+        url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
+    )
+    docs_sq = loader.load()
+    
+    # Add 
+    docs.extend([*docs_pydantic, *docs_sq])
+    
+    # Sort the list based on the URLs in 'metadata' -> 'source'
+    d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
+    d_reversed = list(reversed(d_sorted))
+    
+    # Concatenate the 'page_content' of each sorted dictionary
+    concatenated_content = "\n\n\n --- \n\n\n".join(
+        [doc.page_content for doc in d_reversed]
+    )
+    
+    ## Data model
+    class code(BaseModel):
+        """Code output"""
+        prefix: str = Field(description="Description of the problem and approach")
+        imports: str = Field(description="Code block import statements")
+        code: str = Field(description="Code block not including import statements")
+    
+    ## LLM
+    model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
+    
+    # Tool
+    code_tool_oai = convert_to_openai_tool(code)
+    
+    # LLM with tool and enforce invocation
+    llm_with_tool = model.bind(
+        tools=[convert_to_openai_tool(code_tool_oai)],
+        tool_choice={"type": "function", "function": {"name": "code"}},
+    )
+    
+    # Parser
+    parser_tool = PydanticToolsParser(tools=[code])
+    
+    ## Prompt
+    template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
+        Here is a full set of LCEL documentation: 
+        \n ------- \n
+        {context} 
+        \n ------- \n
+        Answer the user question based on the above provided documentation. \n
+        Ensure any code you provide can be executed with all required imports and variables defined. \n
+        Structure your answer with a description of the code solution. \n
+        Then list the imports. And finally list the functioning code block. \n
+        Here is the user question: \n --- --- --- \n {question}"""
+
+    ## Generation
+    if "error" in state_dict:
+        print("---RE-GENERATE SOLUTION w/ ERROR FEEDBACK---")
+        
+        error = state_dict["error"]
+        code_solution = state_dict["generation"]
+        
+        # Udpate prompt 
+        addendum = """  \n --- --- --- \n You previously tried to solve this problem. \n Here is your solution:  
+                    \n --- --- --- \n {generation}  \n --- --- --- \n  Here is the resulting error from code 
+                    execution:  \n --- --- --- \n {error}  \n --- --- --- \n Please re-try to answer this. 
+                    Structure your answer with a description of the code solution. \n Then list the imports. 
+                    And finally list the functioning code block. Structure your answer with a description of 
+                    the code solution. \n Then list the imports. And finally list the functioning code block. 
+                    \n Here is the user question: \n --- --- --- \n {question}"""
+        template = template +  addendum
+
+        # Prompt 
+        prompt = PromptTemplate(
+            template=template,
+            input_variables=["context", "question", "generation", "error"],
+        )
+        
+        # Chain
+        chain = (
+            {
+                "context": lambda x: concatenated_content,
+                "question": itemgetter("question"),
+                "generation": itemgetter("generation"),
+                "error": itemgetter("error"),
+            }
+            | prompt
+            | llm_with_tool 
+            | parser_tool
+        )
+
+        code_solution = chain.invoke({"question":question,
+                                      "generation":str(code_solution[0]),
+                                      "error":error})
+                
+    else:
+        print("---GENERATE SOLUTION---")
+        
+        # Prompt 
+        prompt = PromptTemplate(
+            template=template,
+            input_variables=["context", "question"],
+        )
+
+        # Chain
+        chain = (
+            {
+                "context": lambda x: concatenated_content,
+                "question": itemgetter("question"),
+            }
+            | prompt
+            | llm_with_tool 
+            | parser_tool
+        )
+
+        code_solution = chain.invoke({"question":question})
+    
+    return {"keys": {"generation": code_solution, "question": question}}
+
+def check_code_imports(state):
+    """
+    Check imports
+
+    Args:
+        state (dict): The current graph state
+
+    Returns:
+        state (dict): New key added to state, error
+    """
+    
+    ## State
+    print("---CHECKING CODE IMPORTS---")
+    state_dict = state["keys"]
+    question = state_dict["question"]
+    code_solution = state_dict["generation"]
+    imports = code_solution[0].imports
+
+    try:        
+        # Attempt to execute the imports
+        exec(imports)
+    except Exception as e:
+        print("---CODE IMPORT CHECK: FAILED---")
+        # Catch any error during execution (e.g., ImportError, SyntaxError)
+        error = f"Execution error: {e}"
+        if "error" in state_dict:
+            error_prev_runs = state_dict["error"]
+            error = error_prev_runs + "\n --- Most recent run error --- \n" + error     
+    else:
+        print("---CODE IMPORT CHECK: SUCCESS---")
+        # No errors occurred
+        error = "None"
+
+    return {"keys": {"generation": code_solution, "question": question, "error": error}}
+
+def check_code_execution(state):
+    """
+    Check code block execution
+
+    Args:
+        state (dict): The current graph state
+
+    Returns:
+        state (dict): New key added to state, error
+    """
+    
+    ## State
+    print("---CHECKING CODE EXECUTION---")
+    state_dict = state["keys"]
+    question = state_dict["question"]
+    code_solution = state_dict["generation"]
+    imports = code_solution[0].imports
+    code = code_solution[0].code
+    code_block = imports +"\n"+ code
+
+    try:        
+        # Attempt to execute the code block
+        exec(code_block)
+    except Exception as e:
+        print("---CODE BLOCK CHECK: FAILED---")
+        # Catch any error during execution (e.g., ImportError, SyntaxError)
+        error = f"Execution error: {e}"
+        if "error" in state_dict:
+            error_prev_runs = state_dict["error"]
+            error = error_prev_runs + "\n --- Most recent run error --- \n" + error  
+    else:
+        print("---CODE BLOCK CHECK: SUCCESS---")
+        # No errors occurred
+        error = "None"
+
+    return {"keys": {"generation": code_solution, "question": question, "error": error}}
+
+
+### Edges
+
+def decide_to_check_code_exec(state):
+    """
+    Determines whether to test code execution, or re-try answer generation.
+
+    Args:
+        state (dict): The current state of the agent, including all keys.
+
+    Returns:
+        str: Next node to call
+    """
+
+    print("---DECIDE TO TEST CODE EXECUTION---")
+    state_dict = state["keys"]
+    question = state_dict["question"]
+    code_solution = state_dict["generation"]
+    error = state_dict["error"]
+
+    if error == "None":
+        # All documents have been filtered check_relevance
+        # We will re-generate a new query
+        print("---DECISION: TEST CODE EXECUTION---")
+        return "check_code_execution"
+    else:
+        # We have relevant documents, so generate answer
+        print("---DECISION: RE-TRY SOLUTION---")
+        return "generate"
+
+def decide_to_finish(state):
+    """
+    Determines whether to finish.
+
+    Args:
+        state (dict): The current state of the agent, including all keys.
+
+    Returns:
+        str: Next node to call
+    """
+
+    print("---DECIDE TO TEST CODE EXECUTION---")
+    state_dict = state["keys"]
+    question = state_dict["question"]
+    code_solution = state_dict["generation"]
+    error = state_dict["error"]
+
+    if error == "None":
+        # All documents have been filtered check_relevance
+        # We will re-generate a new query
+        print("---DECISION: TEST CODE EXECUTION---")
+        return "end"
+    else:
+        # We have relevant documents, so generate answer
+        print("---DECISION: RE-TRY SOLUTION---")
+        return "generate"
+    
+# Flow
+workflow = StateGraph(GraphState)
+
+# Define the nodes
+workflow.add_node("generate", generate)  # generation solution
+workflow.add_node("check_code_imports", check_code_imports)  # check imports
+workflow.add_node("check_code_execution", check_code_execution)  # check execution
+
+# Build graph
+workflow.set_entry_point("generate")
+workflow.add_edge("generate", "check_code_imports")
+workflow.add_conditional_edges(
+    "check_code_imports",
+    decide_to_check_code_exec,
+    {
+        "check_code_execution": "check_code_execution",
+        "generate": "generate",
+    },
+)
+workflow.add_conditional_edges(
+    "check_code_execution",
+    decide_to_finish,
+    {
+        "end": END,
+        "generate": "generate",
+    },
+)
+
+# Compile
+app = workflow.compile()
@@ -23,7 +23,9 @@ from langchain.schema.runnable import (
 )
 from langchain.vectorstores import Weaviate
 from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers.openai_tools import PydanticToolsParser
+from langchain_core.utils.function_calling import convert_to_openai_tool

 # Prompts
 from .prompts import REPHRASE_TEMPLATE, RESPONSE_TEMPLATE
@@ -196,24 +198,48 @@ sub_question_answer_chain = (
    | RunnablePassthrough.assign(answer=answer_chain).map()
 )

-# Prompt template for final answer
-template = """You are an expert coder. You got a high level question:
+## Data model
+class code(BaseModel):
+    """Code output"""
+    prefix: str = Field(description="Description of the problem and approach")
+    imports: str = Field(description="Code block import statements")
+    code: str = Field(description="Code block not including import statements")

-<question>
-{question}
-</question>
+## LLM
+model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

-Based on this question, you broke it down into sub questions and answered those. These are the results of that:
+# Tool
+code_tool_oai = convert_to_openai_tool(code)

-<subquestions>
-{subq}
-</subquestions>
-    
-Now, combine all the subquestion answers to generate a final code snippet writing the code that was asked for.
-"""
-prompt = ChatPromptTemplate.from_template(template)
+# LLM with tool and enforce invocation
+llm_with_tool = model.bind(
+    tools=[convert_to_openai_tool(code_tool_oai)],
+    tool_choice={"type": "function", "function": {"name": "code"}},
+)

-llm = ChatOpenAI(temperature=0, model="gpt-4")
+# Parser
+parser_tool = PydanticToolsParser(tools=[code])
+
+# Create a prompt template with format instructions and the query
+prompt = PromptTemplate(
+    template = """You are an expert coder. You got a high level question:
+
+    <question>
+    {question}
+    </question>
+
+    Based on this question, you broke it down into sub questions and answered those. These are the results of that:
+
+    <subquestions>
+    {subq}
+    </subquestions>
+        
+    Ensure any code you provide can be executed with all required imports and variables defined. \n
+    Structure your answer with a description of the code solution. \n
+    Then list the imports. And finally list the functioning code block. \n
+    """,
+    input_variables=["question","subq"],
+)

 # Answer chain
 chain = (
@@ -229,8 +255,8 @@ chain = (
        )
    )
    | prompt
-    | llm
-    | StrOutputParser()
+    | llm_with_tool
+    | parser_tool
 )

 # Add typing for input
@@ -1,257 +0,0 @@
-import os
-from operator import itemgetter
-from typing import Dict, List, Optional, Sequence
-
-import weaviate
-from langchain import hub
-from langchain_openai import ChatOpenAI
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.embeddings.voyageai import VoyageEmbeddings
-from langchain.output_parsers.json import SimpleJsonOutputParser
-from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain.schema import Document
-from langchain.schema.embeddings import Embeddings
-from langchain.schema.language_model import BaseLanguageModel
-from langchain.schema.messages import AIMessage, HumanMessage
-from langchain.schema.output_parser import StrOutputParser
-from langchain.schema.retriever import BaseRetriever
-from langchain.schema.runnable import (
-    Runnable,
-    RunnableBranch,
-    RunnableLambda,
-    RunnableMap,
-)
-from langchain.vectorstores import Weaviate
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain.output_parsers import PydanticOutputParser
-
-# Prompts
-from .prompts import REPHRASE_TEMPLATE, RESPONSE_TEMPLATE
-
-# Keys
-WEAVIATE_URL = os.environ["WEAVIATE_URL"]
-WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
-WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
-
-
-# Define the data structure for chat requests
-class ChatRequest(BaseModel):
-    question: str  # The question asked in the chat
-    chat_history: Optional[List[Dict[str, str]]]  # Optional chat history
-
-
-# Function to get the embeddings model based on environment variables
-def get_embeddings_model() -> Embeddings:
-    # Check for specific environment variables to determine the embeddings model
-    if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
-        return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
-    # Default to OpenAI embeddings if the specific environment variables are not set
-    return OpenAIEmbeddings(chunk_size=200)
-
-
-# Function to initialize and return the retriever
-def get_retriever() -> BaseRetriever:
-    # Initialize Weaviate client with authentication and connection details
-    weaviate_client = weaviate.Client(
-        url=WEAVIATE_URL,
-        auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
-    )
-    # Configure the Weaviate client with specific settings
-    weaviate_client = Weaviate(
-        client=weaviate_client,
-        index_name=WEAVIATE_DOCS_INDEX_NAME,
-        text_key="text",
-        embedding=get_embeddings_model(),
-        by_text=False,
-        attributes=["source", "title"],
-    )
-    # Return the configured retriever
-    return weaviate_client.as_retriever(search_kwargs=dict(k=6))
-
-
-# Function to create a chain of retrievers
-def create_retriever_chain(
-    llm: BaseLanguageModel, retriever: BaseRetriever
-) -> Runnable:
-    # Template to condense the question
-    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(REPHRASE_TEMPLATE)
-    # Create a chain to process the question and retrieve relevant information
-    condense_question_chain = (
-        CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()
-    ).with_config(
-        run_name="CondenseQuestion",
-    )
-    conversation_chain = condense_question_chain | retriever
-    # Return a branch of runnables depending on whether there's chat history
-    return RunnableBranch(
-        (
-            RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
-                run_name="HasChatHistoryCheck"
-            ),
-            conversation_chain.with_config(run_name="RetrievalChainWithHistory"),
-        ),
-        (
-            RunnableLambda(itemgetter("question")).with_config(
-                run_name="Itemgetter:question"
-            )
-            | retriever
-        ).with_config(run_name="RetrievalChainWithNoHistory"),
-    ).with_config(run_name="RouteDependingOnChatHistory")
-
-
-# Function to format the retrieved documents
-def format_docs(docs: Sequence[Document]) -> str:
-    formatted_docs = []
-    # Iterate through each document and format it
-    for i, doc in enumerate(docs):
-        doc_string = f"<doc id='{i}'>{doc.page_content}</doc>"
-        formatted_docs.append(doc_string)
-    return "\n".join(formatted_docs)
-
-
-# Function to serialize the chat history from a chat request
-def serialize_history(request: ChatRequest):
-    chat_history = request["chat_history"] or []
-    converted_chat_history = []
-    # Convert each message in the chat history to the appropriate message type
-    for message in chat_history:
-        if message.get("human") is not None:
-            converted_chat_history.append(HumanMessage(content=message["human"]))
-        if message.get("ai") is not None:
-            converted_chat_history.append(AIMessage(content=message["ai"]))
-    return converted_chat_history
-
-
-# Function to create the answer chain
-def create_question_anwser_chain(
-    llm: BaseLanguageModel,
-    retriever: BaseRetriever,
-) -> Runnable:
-    # Create a retriever chain and configure it
-    retriever_chain = create_retriever_chain(
-        llm,
-        retriever,
-    ).with_config(run_name="FindDocs")
-    _context = RunnableMap(
-        {
-            "context": retriever_chain | format_docs,
-            "question": itemgetter("question"),
-        }
-    ).with_config(run_name="RetrieveDocs")
-    # Define the chat prompt template
-    prompt = ChatPromptTemplate.from_messages(
-        [
-            ("system", RESPONSE_TEMPLATE),
-            ("human", "{question}"),
-        ]
-    )
-
-    # Create a response synthesizer using the defined prompt
-    response_synthesizer = (prompt | llm | StrOutputParser()).with_config(
-        run_name="GenerateResponse",
-    )
-    # Return the final chain of processes
-    return (
-        {
-            "question": RunnableLambda(itemgetter("question")).with_config(
-                run_name="Itemgetter:question"
-            ),
-        }
-        | _context
-        | response_synthesizer
-    )
-
-
-# Retriever
-retriever = get_retriever()
-
-# Sub-question prompt
-sub_question_prompt = hub.pull("hwchase17/code-langchain-sub-question")
-
-# Chain for sub-question generation
-sub_question_chain = (
-    RunnablePassthrough.assign(context=(lambda x: x["question"]) | retriever)
-    | sub_question_prompt
-    | ChatOpenAI(model="gpt-4-1106-preview")
-    | SimpleJsonOutputParser()
-)
-
-# LLM
-llm = ChatOpenAI(
-    model="gpt-3.5-turbo-16k",
-    streaming=True,
-    temperature=0,
-)
-
-# Chain that answers questions
-answer_chain = create_question_anwser_chain(
-    llm,
-    retriever,
-)
-
-# Chain for sub-question answering
-sub_question_answer_chain = (
-    sub_question_chain
-    | (lambda x: [{"question": v} for v in x])
-    | RunnablePassthrough.assign(answer=answer_chain).map()
-)
-
-# Output
-class FunctionOutput(BaseModel):
-    prefix: str = Field(description="The prefix of the output")
-    code_block: str = Field(description="The code block of the output")
-
-# Create an instance of the PydanticOutputParser
-parser = PydanticOutputParser(pydantic_object=FunctionOutput)
-
-# Get the format instructions from the output parser
-format_instructions = parser.get_format_instructions()
-
-# Create a prompt template with format instructions and the query
-prompt = PromptTemplate(
-    template = """You are an expert coder. You got a high level question:
-
-    <question>
-    {question}
-    </question>
-
-    Based on this question, you broke it down into sub questions and answered those. These are the results of that:
-
-    <subquestions>
-    {subq}
-    </subquestions>
-        
-    Now, combine all the subquestion answers to generate a final code snippet writing the code that was asked for.
-    
-    Output format instructions: \n {format_instructions}
-    """,
-    input_variables=["question"],
-    partial_variables={"format_instructions": format_instructions},
-)
-
-llm = ChatOpenAI(temperature=0, model="gpt-4")
-
-# Answer chain
-chain = (
-    RunnablePassthrough().assign(
-        subq=sub_question_answer_chain
-        | (
-            lambda sub_questions_answers: "\n\n".join(
-                [
-                    f"Question: {q['question']}\n\nAnswer: {q['answer']}"
-                    for q in sub_questions_answers
-                ]
-            )
-        )
-    )
-    | prompt
-    | llm
-    | StrOutputParser()
-)
-
-# Add typing for input
-class Question(BaseModel):
-    __root__: str
-
-chain = chain.with_types(input_type=Question)
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "e2de8bbc-b907-440e-9860-2722a32cfa13",
   "metadata": {},
   "outputs": [],
@@ -32,16 +32,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "id": "910170aa-e10f-4907-a99c-866fc1b008d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from app.test_chains.base_rag_chain import chain as base_rag\n",
    "from app.test_chains.multi_query_chain import chain as multi_query\n",
-    "from app.test_chains.context_stuffing_chain import chain as context_stuffing\n",
-    "from app.test_chains.context_stuffing_0125_chain import chain as context_stuffing_0125\n",
-    "from app.test_chains.context_stuffing_0125_chain_prompt import chain as context_stuffing_0125_prompt"
+    "from app.test_chains.context_stuffing_chain import chain as context_stuffing"
   ]
  },
  {
@@ -89,8 +87,6 @@
    "    \"base-case-rag\":base_rag,\n",
    "    \"multi-query\":multi_query,\n",
    "    \"context-stuffing\": context_stuffing,\n",
-    "    \"context-stuffing-0125\": context_stuffing_0125,\n",
-    "    \"context-stuffing-0125-prompt\": context_stuffing_0125_prompt,\n",
    "}\n",
    "\n",
    "# Run evaluation\n",
@@ -172,11 +168,6 @@
    "# Results \n",
    "df_cot_eval = pd.read_csv(\"CoT_eval_result.csv\")\n",
    "\n",
-    "# Minor change to naming\n",
-    "df_cot_eval['chain'].replace({'context-stuffing-0125-prompt': 'cs-0125-pe', \n",
-    "                              'context-stuffing-0125': 'cs-0125', \n",
-    "                              'context-stuffing-1106': 'cs-1106'}, inplace=True)\n",
-    "\n",
    "# Stats\n",
    "std_errors = df_cot_eval.groupby([\"chain\"]).apply(group_standard_error)\n",
    "grouped_frac_correct = df_cot_eval.groupby('chain')['correct'].sum() / df_cot_eval.groupby('chain')['correct'].count() \n",
@@ -529,7 +520,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.9.16"
  }
 },
 "nbformat": 4,