mirror of
https://github.com/langchain-ai/langgraph-reflection.git
synced 2026-07-01 16:00:21 -04:00
cr
This commit is contained in:
@@ -45,7 +45,7 @@ In this example, the reflection agent uses another LLM to judge its output. The
|
||||
Installation:
|
||||
|
||||
```
|
||||
pip install langgraph-reflection langchain
|
||||
pip install langgraph-reflection langchain openevals
|
||||
```
|
||||
|
||||
Example usage:
|
||||
@@ -56,15 +56,20 @@ assistant_graph = ...
|
||||
# Define the judge function that evaluates responses
|
||||
def judge_response(state, config):
|
||||
"""Evaluate the assistant's response using a separate judge model."""
|
||||
judge_model = init_chat_model(...).bind_tools([Finish])
|
||||
response = judge_model.invoke([...])
|
||||
|
||||
# If the judge called Finish, response is approved
|
||||
if len(response.tool_calls) == 1:
|
||||
evaluator = create_llm_as_judge(
|
||||
prompt=critique_prompt,
|
||||
model="openai:o3-mini",
|
||||
feedback_key="pass",
|
||||
)
|
||||
eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None)
|
||||
|
||||
if eval_result["score"]:
|
||||
print("✅ Response approved by judge")
|
||||
return
|
||||
else:
|
||||
# Return judge's critique as a new user message
|
||||
return {"messages": [{"role": "user", "content": response.content}]}
|
||||
# Otherwise, return the judge's critique as a new user message
|
||||
print("⚠️ Judge requested improvements")
|
||||
return {"messages": [{"role": "user", "content": eval_result["comment"]}]}
|
||||
|
||||
# Create graphs with reflection
|
||||
judge_graph = StateGraph(MessagesState).add_node(judge_response)...
|
||||
|
||||
+35
-29
@@ -20,35 +20,36 @@ from langgraph_reflection import create_reflection_graph
|
||||
|
||||
def analyze_with_pyright(code_string: str) -> dict:
|
||||
"""Analyze Python code using Pyright for static type checking and errors.
|
||||
|
||||
|
||||
Args:
|
||||
code_string: The Python code to analyze as a string
|
||||
|
||||
|
||||
Returns:
|
||||
dict: The Pyright analysis results
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(suffix='.py', mode='w', delete=False) as temp:
|
||||
with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as temp:
|
||||
temp.write(code_string)
|
||||
temp_path = temp.name
|
||||
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"pyright",
|
||||
"--outputjson",
|
||||
"--level", "error", # Only report errors, not warnings
|
||||
temp_path
|
||||
"--level",
|
||||
"error", # Only report errors, not warnings
|
||||
temp_path,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True
|
||||
text=True,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
return json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"error": "Failed to parse Pyright output",
|
||||
"raw_output": result.stdout
|
||||
"raw_output": result.stdout,
|
||||
}
|
||||
finally:
|
||||
os.unlink(temp_path)
|
||||
@@ -56,10 +57,10 @@ def analyze_with_pyright(code_string: str) -> dict:
|
||||
|
||||
def call_model(state: dict) -> dict:
|
||||
"""Process the user query with a Claude 3 Sonnet model.
|
||||
|
||||
|
||||
Args:
|
||||
state: The current conversation state
|
||||
|
||||
|
||||
Returns:
|
||||
dict: Updated state with model response
|
||||
"""
|
||||
@@ -70,11 +71,13 @@ def call_model(state: dict) -> dict:
|
||||
# Define type classes for code extraction
|
||||
class ExtractPythonCode(TypedDict):
|
||||
"""Type class for extracting Python code. The python_code field is the code to be extracted."""
|
||||
|
||||
python_code: str
|
||||
|
||||
|
||||
class NoCode(TypedDict):
|
||||
"""Type class for indicating no code was found."""
|
||||
|
||||
no_code: bool
|
||||
|
||||
|
||||
@@ -90,35 +93,39 @@ If there is no code to extract - call NoCode."""
|
||||
|
||||
def try_running(state: dict) -> dict | None:
|
||||
"""Attempt to run and analyze the extracted Python code.
|
||||
|
||||
|
||||
Args:
|
||||
state: The current conversation state
|
||||
|
||||
|
||||
Returns:
|
||||
dict | None: Updated state with analysis results if code was found
|
||||
"""
|
||||
model = init_chat_model(model="o3-mini")
|
||||
extraction = model.bind_tools([ExtractPythonCode, NoCode])
|
||||
er = extraction.invoke([{"role": "system", "content": SYSTEM_PROMPT}] + state['messages'])
|
||||
er = extraction.invoke(
|
||||
[{"role": "system", "content": SYSTEM_PROMPT}] + state["messages"]
|
||||
)
|
||||
if len(er.tool_calls) == 0:
|
||||
return None
|
||||
tc = er.tool_calls[0]
|
||||
if tc['name'] != "ExtractPythonCode":
|
||||
if tc["name"] != "ExtractPythonCode":
|
||||
return None
|
||||
|
||||
result = analyze_with_pyright(tc['args']['python_code'])
|
||||
|
||||
result = analyze_with_pyright(tc["args"]["python_code"])
|
||||
print(result)
|
||||
explanation = result['generalDiagnostics']
|
||||
|
||||
if result['summary']['errorCount']:
|
||||
explanation = result["generalDiagnostics"]
|
||||
|
||||
if result["summary"]["errorCount"]:
|
||||
return {
|
||||
"messages": [{
|
||||
"role": "user",
|
||||
"content": f"I ran pyright and found this: {explanation}\n\n"
|
||||
"Try to fix it. Make sure to regenerate the entire code snippet. "
|
||||
"If you are not sure what is wrong, or think there is a mistake, "
|
||||
"you can ask me a question rather than generating code"
|
||||
}]
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"I ran pyright and found this: {explanation}\n\n"
|
||||
"Try to fix it. Make sure to regenerate the entire code snippet. "
|
||||
"If you are not sure what is wrong, or think there is a mistake, "
|
||||
"you can ask me a question rather than generating code",
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -146,7 +153,6 @@ def create_graphs():
|
||||
return create_reflection_graph(assistant_graph, judge_graph).compile()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Run an example query through the reflection system."""
|
||||
example_query = [
|
||||
@@ -159,4 +165,4 @@ if __name__ == "__main__":
|
||||
print("Running example with reflection...")
|
||||
reflection_app = create_graphs()
|
||||
result = reflection_app.invoke({"messages": example_query})
|
||||
print("Result:", result)
|
||||
print("Result:", result)
|
||||
|
||||
+17
-15
@@ -3,13 +3,15 @@
|
||||
Should install:
|
||||
|
||||
```
|
||||
pip install langgraph-reflection langchain
|
||||
pip install langgraph-reflection langchain openevals
|
||||
```
|
||||
"""
|
||||
|
||||
from langgraph_reflection import create_reflection_graph
|
||||
from langchain.chat_models import init_chat_model
|
||||
from langgraph.graph import StateGraph, MessagesState, START, END
|
||||
from typing import TypedDict
|
||||
from openevals.llm import create_llm_as_judge
|
||||
|
||||
|
||||
# Define the main assistant model that will generate responses
|
||||
@@ -46,34 +48,34 @@ Evaluate the response based on these criteria:
|
||||
4. Helpfulness - Does it provide actionable and useful information?
|
||||
5. Safety - Does it avoid harmful or inappropriate content?
|
||||
|
||||
If the response meets ALL criteria satisfactorily, call the `Finish` tool to approve it.
|
||||
If the response meets ALL criteria satisfactorily, set pass to True.
|
||||
|
||||
If you find ANY issues with the response, do NOT call the Finish tool. Instead, provide specific and constructive feedback about what needs to be improved, and your response will be sent back to the assistant as a follow-up query.
|
||||
If you find ANY issues with the response, do NOT set pass to True. Instead, provide specific and constructive feedback in the comment key and set pass to False.
|
||||
|
||||
Be detailed in your critique so the assistant can understand exactly how to improve."""
|
||||
Be detailed in your critique so the assistant can understand exactly how to improve.
|
||||
|
||||
<response>
|
||||
{outputs}
|
||||
</response>"""
|
||||
|
||||
|
||||
# Define the judge function with a more robust evaluation approach
|
||||
def judge_response(state, config):
|
||||
"""Evaluate the assistant's response using a separate judge model."""
|
||||
# Use a different model as the judge (can be smaller/more efficient)
|
||||
judge_model = init_chat_model(model="o3-mini", model_provider="openai").bind_tools(
|
||||
[Finish]
|
||||
evaluator = create_llm_as_judge(
|
||||
prompt=critique_prompt,
|
||||
model="openai:o3-mini",
|
||||
feedback_key="pass",
|
||||
)
|
||||
eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None)
|
||||
|
||||
# Create judge prompt with all messages for context
|
||||
response = judge_model.invoke(
|
||||
[{"role": "system", "content": critique_prompt}] + state["messages"]
|
||||
)
|
||||
|
||||
# If the judge called the Finish tool, the response is approved
|
||||
if len(response.tool_calls) == 1:
|
||||
if eval_result["score"]:
|
||||
print("✅ Response approved by judge")
|
||||
return
|
||||
else:
|
||||
# Otherwise, return the judge's critique as a new user message
|
||||
print("⚠️ Judge requested improvements")
|
||||
return {"messages": [{"role": "user", "content": response.content}]}
|
||||
return {"messages": [{"role": "user", "content": eval_result["comment"]}]}
|
||||
|
||||
|
||||
# Define the judge graph
|
||||
|
||||
Reference in New Issue
Block a user