diff --git a/.gitignore b/.gitignore index 4b70b60..b63e58d 100644 --- a/.gitignore +++ b/.gitignore @@ -163,4 +163,7 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -.langgraph_api/ \ No newline at end of file +# Other +.langgraph_api/ +.vscode/ +.DS_Store \ No newline at end of file diff --git a/deep_research/agent.py b/deep_research/agent.py index 7977872..6149254 100644 --- a/deep_research/agent.py +++ b/deep_research/agent.py @@ -4,7 +4,10 @@ This module creates a deep research agent with custom tools and prompts for conducting web research with strategic thinking and context management. """ +from datetime import datetime + from langchain.chat_models import init_chat_model +from langchain_google_genai import ChatGoogleGenerativeAI from deepagents import create_deep_agent from research_agent.prompts import ( @@ -18,7 +21,10 @@ from research_agent.tools import tavily_search, think_tool max_concurrent_research_units = 3 max_researcher_iterations = 3 -# Combine instructions +# Get current date +current_date = datetime.now().strftime("%Y-%m-%d") + +# Combine orchestrator instructions (RESEARCHER_INSTRUCTIONS only for sub-agents) INSTRUCTIONS = ( RESEARCH_WORKFLOW_INSTRUCTIONS + "\n\n" @@ -28,25 +34,21 @@ INSTRUCTIONS = ( max_concurrent_research_units=max_concurrent_research_units, max_researcher_iterations=max_researcher_iterations, ) - + "\n\n" - + "=" * 80 - + "\n\n" - + RESEARCHER_INSTRUCTIONS ) # Create research sub-agent research_sub_agent = { "name": "research-agent", "description": "Delegate research to the sub-agent researcher. Only give this researcher one topic at a time.", - "system_prompt": RESEARCHER_INSTRUCTIONS, + "system_prompt": RESEARCHER_INSTRUCTIONS.format(date=current_date), "tools": [tavily_search, think_tool], } -# Initialize model -model = init_chat_model( - model="anthropic:claude-sonnet-4-5-20250929", - temperature=0.0 -) +# Model Claude 4.5 +# model = init_chat_model(model="anthropic:claude-sonnet-4-5-20250929", temperature=0.0) + +# Model Gemini 3 +model = ChatGoogleGenerativeAI(model="gemini-3-pro-preview", temperature=0.0) # Create the agent agent = create_deep_agent( diff --git a/deep_research/research_agent.ipynb b/deep_research/research_agent.ipynb index 6c29da8..a110bbd 100644 --- a/deep_research/research_agent.ipynb +++ b/deep_research/research_agent.ipynb @@ -94,6 +94,7 @@ "metadata": {}, "outputs": [], "source": [ + "from datetime import datetime\n", "from utils import show_prompt, format_messages\n", "from research_agent.prompts import (\n", " RESEARCHER_INSTRUCTIONS,\n", @@ -161,12 +162,15 @@ " </Show Your Thinking> \n", " \n", " <Final Response Format> \n", - " When providing your final answer, use numbered citations and include a Sources section: \n", + " When providing your findings back to the orchestrator: \n", " \n", - " 1. **Cite sources inline**: Use [1], [2], [3] format when referencing information \n", - " 2. **Sources section**: End with a numbered list of all sources \n", + " 1. **Structure your response**: Organize findings with clear headings and detailed explanations \n", + " 2. **Cite sources inline**: Use [1], [2], [3] format when referencing information from your searches \n", + " 3. **Include Sources section**: End with ### Sources listing each numbered source with title and URL \n", " \n", - " Example format: \n", + " Example: \n", + " ``` \n", + " ## Key Findings \n", " \n", " Context engineering is a critical technique for AI agents [1]. Studies show that proper context management \n", " can improve performance by 40% [2]. \n", @@ -174,8 +178,9 @@ " ### Sources \n", " [1] Context Engineering Guide: https://example.com/context-guide \n", " [2] AI Performance Study: https://example.com/study \n", + " ``` \n", " \n", - " This allows readers to verify information and explore topics further. \n", + " The orchestrator will consolidate citations from all sub-agents into the final report. \n", " </Final Response Format> \n", " \n", " \n", @@ -233,12 +238,15 @@ "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m When providing your final answer, use numbered citations and include a Sources section: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m When providing your findings back to the orchestrator: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 1. **Cite sources inline**: Use [1], [2], [3] format when referencing information \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 2. **Sources section**: End with a numbered list of all sources \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 1. **Structure your response**: Organize findings with clear headings and detailed explanations \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 2. **Cite sources inline**: Use [1], [2], [3] format when referencing information from your searches \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 3. **Include Sources section**: End with \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources listing each numbered source with title and URL\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Example format: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Example: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m ``` \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## Key Findings\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m Context engineering is a critical technique for AI agents [1]. Studies show that proper context management \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m can improve performance by 40% [2]. \u001b[34m│\u001b[0m\n", @@ -246,8 +254,9 @@ "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m [1] Context Engineering Guide: https://example.com/context-guide \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m [2] AI Performance Study: https://example.com/study \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m ``` \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m This allows readers to verify information and explore topics further. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m The orchestrator will consolidate citations from all sub-agents into the final report. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", @@ -281,11 +290,14 @@ "metadata": {}, "outputs": [], "source": [ + "# Get current date\n", + "current_date = datetime.now().strftime(\"%Y-%m-%d\")\n", + "\n", "# Create research sub-agent\n", "research_sub_agent = {\n", " \"name\": \"research-agent\",\n", " \"description\": \"Delegate research to the sub-agent researcher. Only give this researcher one topic at a time.\",\n", - " \"system_prompt\": RESEARCHER_INSTRUCTIONS,\n", + " \"system_prompt\": RESEARCHER_INSTRUCTIONS.format(date=current_date),\n", " \"tools\": [tavily_search, think_tool],\n", "}" ] @@ -317,12 +329,16 @@ " \n", " Follow this workflow for all research requests: \n", " \n", - " 1. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` \n", - " 2. **Plan**: Create a todo list with write_todos to break down the research into focused tasks \n", - " 3. **Research**: Delegate research tasks to sub-agents using the task() tool \n", - " 4. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines \n", + " 1. **Plan**: Create a todo list with write_todos to break down the research into focused tasks \n", + " 2. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` \n", + " 3. **Research**: Delegate research tasks to sub-agents using the task() tool - ALWAYS use sub-agents for \n", + " research, never conduct research yourself \n", + " 4. **Synthesize**: Review all sub-agent findings and consolidate citations (each unique URL gets one number \n", + " across all findings) \n", + " 5. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines \n", " below) \n", - " 5. **Verify**: Read `/research_request.md` to confirm you've addressed all aspects of the original question \n", + " 6. **Verify**: Read `/research_request.md` and confirm you've addressed all aspects with proper citations and \n", + " structure \n", " \n", " ## Research Planning Guidelines \n", " - Batch similar research tasks into a single TODO to minimize overhead \n", @@ -362,10 +378,9 @@ " - Each section should be comprehensive and detailed \n", " - Use bullet points only when listing is more appropriate than prose \n", " \n", - " <Citation Rules> \n", - " Use numbered citations throughout your report: \n", + " **Citation format:** \n", " - Cite sources inline using [1], [2], [3] format \n", - " - Assign each unique URL a single citation number \n", + " - Assign each unique URL a single citation number across ALL sub-agent findings \n", " - End report with ### Sources section listing each numbered source \n", " - Number sources sequentially without gaps (1,2,3,4...) \n", " - Format: [1] Source Title: URL (each on separate line for proper list rendering) \n", @@ -376,7 +391,6 @@ " ### Sources \n", " [1] AI Research Paper: https://example.com/paper \n", " [2] Industry Analysis: https://example.com/analysis \n", - " </Citation Rules> \n", " \n", " \n", " ================================================================================ \n", @@ -388,97 +402,41 @@ " \n", " ## Delegation Strategy \n", " \n", - " **Simple queries** → 1 sub-agent: \n", - " - \"List the top 10 coffee shops in San Francisco\" \n", - " - \"What is quantum computing?\" \n", - " - \"Summarize the history of the internet\" \n", + " **DEFAULT: Start with 1 sub-agent** for most queries: \n", + " - \"What is quantum computing?\" → 1 sub-agent (general overview) \n", + " - \"List the top 10 coffee shops in San Francisco\" → 1 sub-agent \n", + " - \"Summarize the history of the internet\" → 1 sub-agent \n", + " - \"Research context engineering for AI agents\" → 1 sub-agent (covers all aspects) \n", " \n", - " **Comparisons** → 1 sub-agent per element: \n", + " **ONLY parallelize when the query EXPLICITLY requires comparison or has clearly independent aspects:** \n", + " \n", + " **Explicit comparisons** → 1 sub-agent per element: \n", " - \"Compare OpenAI vs Anthropic vs DeepMind AI safety approaches\" → 3 parallel sub-agents \n", " - \"Compare Python vs JavaScript for web development\" → 2 parallel sub-agents \n", " \n", - " **Multi-faceted research** → 1 sub-agent per aspect: \n", - " - \"Research renewable energy: costs, environmental impact, adoption rates\" → 3 parallel sub-agents \n", - " - \"Analyze the 2024 election: candidates, issues, polling\" → 3 parallel sub-agents \n", + " **Clearly separated aspects** → 1 sub-agent per aspect (use sparingly): \n", + " - \"Research renewable energy adoption in Europe, Asia, and North America\" → 3 parallel sub-agents (geographic \n", + " separation) \n", + " - Only use this pattern when aspects cannot be covered efficiently by a single comprehensive search \n", " \n", - " ## Parallel Execution \n", - " - Use at most {max_concurrent_research_units} parallel sub-agents per iteration \n", + " ## Key Principles \n", + " - **Bias towards single sub-agent**: One comprehensive research task is more token-efficient than multiple \n", + " narrow ones \n", + " - **Avoid premature decomposition**: Don't break \"research X\" into \"research X overview\", \"research X \n", + " techniques\", \"research X applications\" - just use 1 sub-agent for all of X \n", + " - **Parallelize only for clear comparisons**: Use multiple sub-agents when comparing distinct entities or \n", + " geographically separated data \n", + " \n", + " ## Parallel Execution Limits \n", + " - Use at most 3 parallel sub-agents per iteration \n", " - Make multiple task() calls in a single response to enable parallel execution \n", " - Each sub-agent returns findings independently \n", " \n", " ## Research Limits \n", - " - Stop after {max_researcher_iterations} delegation rounds if you haven't found adequate sources \n", + " - Stop after 3 delegation rounds if you haven't found adequate sources \n", " - Stop when you have sufficient information to answer comprehensively \n", " - Bias towards focused research over exhaustive exploration \n", " \n", - " ================================================================================ \n", - " \n", - " You are a research assistant conducting research on the user's input topic. For context, today's date is \n", - " {date}. \n", - " \n", - " <Task> \n", - " Your job is to use tools to gather information about the user's input topic. \n", - " You can use any of the research tools provided to you to find resources that can help answer the research \n", - " question. \n", - " You can call these tools in series or in parallel, your research is conducted in a tool-calling loop. \n", - " </Task> \n", - " \n", - " <Available Research Tools> \n", - " You have access to two specific research tools: \n", - " 1. **tavily_search**: For conducting web searches to gather information \n", - " 2. **think_tool**: For reflection and strategic planning during research \n", - " **CRITICAL: Use think_tool after each search to reflect on results and plan next steps** \n", - " </Available Research Tools> \n", - " \n", - " <Instructions> \n", - " Think like a human researcher with limited time. Follow these steps: \n", - " \n", - " 1. **Read the question carefully** - What specific information does the user need? \n", - " 2. **Start with broader searches** - Use broad, comprehensive queries first \n", - " 3. **After each search, pause and assess** - Do I have enough to answer? What's still missing? \n", - " 4. **Execute narrower searches as you gather information** - Fill in the gaps \n", - " 5. **Stop when you can answer confidently** - Don't keep searching for perfection \n", - " </Instructions> \n", - " \n", - " <Hard Limits> \n", - " **Tool Call Budgets** (Prevent excessive searching): \n", - " - **Simple queries**: Use 2-3 search tool calls maximum \n", - " - **Complex queries**: Use up to 5 search tool calls maximum \n", - " - **Always stop**: After 5 search tool calls if you cannot find the right sources \n", - " \n", - " **Stop Immediately When**: \n", - " - You can answer the user's question comprehensively \n", - " - You have 3+ relevant examples/sources for the question \n", - " - Your last 2 searches returned similar information \n", - " </Hard Limits> \n", - " \n", - " <Show Your Thinking> \n", - " After each search tool call, use think_tool to analyze the results: \n", - " - What key information did I find? \n", - " - What's missing? \n", - " - Do I have enough to answer the question comprehensively? \n", - " - Should I search more or provide my answer? \n", - " </Show Your Thinking> \n", - " \n", - " <Final Response Format> \n", - " When providing your final answer, use numbered citations and include a Sources section: \n", - " \n", - " 1. **Cite sources inline**: Use [1], [2], [3] format when referencing information \n", - " 2. **Sources section**: End with a numbered list of all sources \n", - " \n", - " Example format: \n", - " \n", - " Context engineering is a critical technique for AI agents [1]. Studies show that proper context management \n", - " can improve performance by 40% [2]. \n", - " \n", - " ### Sources \n", - " [1] Context Engineering Guide: https://example.com/context-guide \n", - " [2] AI Performance Study: https://example.com/study \n", - " \n", - " This allows readers to verify information and explore topics further. \n", - " </Final Response Format> \n", - " \n", - " \n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], @@ -489,12 +447,16 @@ "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m Follow this workflow for all research requests: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 1. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 2. **Plan**: Create a todo list with write_todos to break down the research into focused tasks \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 3. **Research**: Delegate research tasks to sub-agents using the task() tool \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 4. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 1. **Plan**: Create a todo list with write_todos to break down the research into focused tasks \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 2. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 3. **Research**: Delegate research tasks to sub-agents using the task() tool - ALWAYS use sub-agents for \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m research, never conduct research yourself \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 4. **Synthesize**: Review all sub-agent findings and consolidate citations (each unique URL gets one number \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m across all findings) \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 5. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m below) \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 5. **Verify**: Read `/research_request.md` to confirm you've addressed all aspects of the original question \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 6. **Verify**: Read `/research_request.md` and confirm you've addressed all aspects with proper citations and \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m structure \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;35m## Research Planning Guidelines\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Batch similar research tasks into a single TODO to minimize overhead \u001b[34m│\u001b[0m\n", @@ -534,10 +496,9 @@ "\u001b[34m│\u001b[0m - Each section should be comprehensive and detailed \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Use bullet points only when listing is more appropriate than prose \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Use numbered citations throughout your report: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **Citation format:** \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Cite sources inline using [1], [2], [3] format \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - Assign each unique URL a single citation number \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - Assign each unique URL a single citation number across ALL sub-agent findings \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - End report with \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources section listing each numbered source\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Number sources sequentially without gaps (1,2,3,4...) \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Format: [1] Source Title: URL (each on separate line for proper list rendering) \u001b[34m│\u001b[0m\n", @@ -548,7 +509,6 @@ "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m [1] AI Research Paper: https://example.com/paper \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m [2] Industry Analysis: https://example.com/analysis \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m ================================================================================ \u001b[34m│\u001b[0m\n", @@ -560,97 +520,41 @@ "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;35m## Delegation Strategy\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m **Simple queries** → 1 sub-agent: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - \"List the top 10 coffee shops in San Francisco\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - \"What is quantum computing?\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - \"Summarize the history of the internet\" \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **DEFAULT: Start with 1 sub-agent** for most queries: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - \"What is quantum computing?\" → 1 sub-agent (general overview) \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - \"List the top 10 coffee shops in San Francisco\" → 1 sub-agent \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - \"Summarize the history of the internet\" → 1 sub-agent \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - \"Research context engineering for AI agents\" → 1 sub-agent (covers all aspects) \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m **Comparisons** → 1 sub-agent per element: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **ONLY parallelize when the query EXPLICITLY requires comparison or has clearly independent aspects:** \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **Explicit comparisons** → 1 sub-agent per element: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - \"Compare OpenAI vs Anthropic vs DeepMind AI safety approaches\" → 3 parallel sub-agents \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - \"Compare Python vs JavaScript for web development\" → 2 parallel sub-agents \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m **Multi-faceted research** → 1 sub-agent per aspect: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - \"Research renewable energy: costs, environmental impact, adoption rates\" → 3 parallel sub-agents \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - \"Analyze the 2024 election: candidates, issues, polling\" → 3 parallel sub-agents \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **Clearly separated aspects** → 1 sub-agent per aspect (use sparingly): \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - \"Research renewable energy adoption in Europe, Asia, and North America\" → 3 parallel sub-agents (geographic \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m separation) \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - Only use this pattern when aspects cannot be covered efficiently by a single comprehensive search \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Parallel Execution\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - Use at most {max_concurrent_research_units} parallel sub-agents per iteration \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## Key Principles\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - **Bias towards single sub-agent**: One comprehensive research task is more token-efficient than multiple \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m narrow ones \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - **Avoid premature decomposition**: Don't break \"research X\" into \"research X overview\", \"research X \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m techniques\", \"research X applications\" - just use 1 sub-agent for all of X \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - **Parallelize only for clear comparisons**: Use multiple sub-agents when comparing distinct entities or \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m geographically separated data \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## Parallel Execution Limits\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - Use at most 3 parallel sub-agents per iteration \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Make multiple task() calls in a single response to enable parallel execution \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Each sub-agent returns findings independently \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;35m## Research Limits\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - Stop after {max_researcher_iterations} delegation rounds if you haven't found adequate sources \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m - Stop after 3 delegation rounds if you haven't found adequate sources \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Stop when you have sufficient information to answer comprehensively \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m - Bias towards focused research over exhaustive exploration \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m ================================================================================ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m You are a research assistant conducting research on the user's input topic. For context, today's date is \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m {date}. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Your job is to use tools to gather information about the user's input topic. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m You can use any of the research tools provided to you to find resources that can help answer the research \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m question. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m You can call these tools in series or in parallel, your research is conducted in a tool-calling loop. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m You have access to two specific research tools: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 1. **tavily_search**: For conducting web searches to gather information \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 2. **think_tool**: For reflection and strategic planning during research \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m **CRITICAL: Use think_tool after each search to reflect on results and plan next steps** \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Think like a human researcher with limited time. Follow these steps: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 1. **Read the question carefully** - What specific information does the user need? \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 2. **Start with broader searches** - Use broad, comprehensive queries first \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 3. **After each search, pause and assess** - Do I have enough to answer? What's still missing? \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 4. **Execute narrower searches as you gather information** - Fill in the gaps \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 5. **Stop when you can answer confidently** - Don't keep searching for perfection \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m **Tool Call Budgets** (Prevent excessive searching): \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - **Simple queries**: Use 2-3 search tool calls maximum \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - **Complex queries**: Use up to 5 search tool calls maximum \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - **Always stop**: After 5 search tool calls if you cannot find the right sources \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m **Stop Immediately When**: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - You can answer the user's question comprehensively \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - You have 3+ relevant examples/sources for the question \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - Your last 2 searches returned similar information \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m After each search tool call, use think_tool to analyze the results: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - What key information did I find? \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - What's missing? \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - Do I have enough to answer the question comprehensively? \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m - Should I search more or provide my answer? \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m When providing your final answer, use numbered citations and include a Sources section: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 1. **Cite sources inline**: Use [1], [2], [3] format when referencing information \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m 2. **Sources section**: End with a numbered list of all sources \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Example format: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Context engineering is a critical technique for AI agents [1]. Studies show that proper context management \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m can improve performance by 40% [2]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [1] Context Engineering Guide: https://example.com/context-guide \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [2] AI Performance Study: https://example.com/study \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m This allows readers to verify information and explore topics further. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -659,16 +563,20 @@ } ], "source": [ + "# Limits\n", + "max_concurrent_research_units = 3\n", + "max_researcher_iterations = 3\n", + "\n", + "# Combine orchestrator instructions (RESEARCHER_INSTRUCTIONS only for sub-agents)\n", "INSTRUCTIONS = (\n", " RESEARCH_WORKFLOW_INSTRUCTIONS\n", " + \"\\n\\n\"\n", " + \"=\" * 80\n", " + \"\\n\\n\"\n", - " + SUBAGENT_DELEGATION_INSTRUCTIONS\n", - " + \"\\n\\n\"\n", - " + \"=\" * 80\n", - " + \"\\n\\n\"\n", - " + RESEARCHER_INSTRUCTIONS\n", + " + SUBAGENT_DELEGATION_INSTRUCTIONS.format(\n", + " max_concurrent_research_units=max_concurrent_research_units,\n", + " max_researcher_iterations=max_researcher_iterations,\n", + " )\n", ")\n", "\n", "show_prompt(INSTRUCTIONS)" @@ -686,7 +594,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "e979ff23-e36a-45b2-bd52-03cf4171f36c", "metadata": {}, "outputs": [], @@ -705,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "62da8411", "metadata": {}, "outputs": [ @@ -735,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 12, "id": "613634c2", "metadata": {}, "outputs": [ @@ -761,80 +669,99 @@ "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
-       " 🔧 Tool Call: write_file                                                                                        \n",
-       "    Args: {                                                                                                      \n",
-       "   \"content\": \"research context engineering approaches used to build AI agents\",                                 \n",
-       "   \"file_path\": \"/research_request.md\"                                                                           \n",
-       " }                                                                                                               \n",
-       "    ID: a4a0832d-9bae-487c-b52f-164d0b47afdc                                                                     \n",
-       "                                                                                                                 \n",
        " 🔧 Tool Call: write_todos                                                                                       \n",
        "    Args: {                                                                                                      \n",
        "   \"todos\": [                                                                                                    \n",
        "     {                                                                                                           \n",
-       "       \"content\": \"Research core concepts and definitions of context engineering in AI agents\",                  \n",
-       "       \"status\": \"in_progress\"                                                                                   \n",
+       "       \"status\": \"in_progress\",                                                                                  \n",
+       "       \"content\": \"Save research request to /research_request.md\"                                                \n",
        "     },                                                                                                          \n",
        "     {                                                                                                           \n",
-       "       \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems,   \n",
-       " Sliding windows)\",                                                                                              \n",
-       "       \"status\": \"pending\"                                                                                       \n",
+       "       \"status\": \"pending\",                                                                                      \n",
+       "       \"content\": \"Research context engineering approaches for AI agents using a sub-agent\"                      \n",
        "     },                                                                                                          \n",
        "     {                                                                                                           \n",
-       "       \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression,           \n",
-       " Summarization, Token selection)\",                                                                               \n",
-       "       \"status\": \"pending\"                                                                                       \n",
+       "       \"status\": \"pending\",                                                                                      \n",
+       "       \"content\": \"Synthesize findings and write final report to /final_report.md\"                               \n",
        "     },                                                                                                          \n",
        "     {                                                                                                           \n",
-       "       \"content\": \"Write final report on context engineering approaches\",                                        \n",
-       "       \"status\": \"pending\"                                                                                       \n",
+       "       \"status\": \"pending\",                                                                                      \n",
+       "       \"content\": \"Verify report against original request\"                                                       \n",
        "     }                                                                                                           \n",
        "   ]                                                                                                             \n",
        " }                                                                                                               \n",
-       "    ID: 29e25aca-85a1-41f9-9149-4304f30e5fa5                                                                     \n",
+       "    ID: 61820e98-a91b-4f74-ac2c-f443af11d389                                                                     \n",
+       "                                                                                                                 \n",
+       " 🔧 Tool Call: write_file                                                                                        \n",
+       "    Args: {                                                                                                      \n",
+       "   \"file_path\": \"/research_request.md\",                                                                          \n",
+       "   \"content\": \"research context engineering approaches used to build AI agents\"                                  \n",
+       " }                                                                                                               \n",
+       "    ID: 3a5e0fbf-0181-4d89-9501-c5fac4b3cef2                                                                     \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: write_file \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"research context engineering approaches used to build AI agents\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"file_path\": \"/research_request.md\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: a4a0832d-9bae-487c-b52f-164d0b47afdc \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research core concepts and definitions of context engineering in AI agents\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"in_progress\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"in_progress\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Sliding windows)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"pending\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization, Token selection)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"pending\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Write final report on context engineering approaches\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"pending\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 29e25aca-85a1-41f9-9149-4304f30e5fa5 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 61820e98-a91b-4f74-ac2c-f443af11d389 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_file \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/research_request.md\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"research context engineering approaches used to build AI agents\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 3a5e0fbf-0181-4d89-9501-c5fac4b3cef2 \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
+       " Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'in_progress'},    \n",
+       " {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'pending'},    \n",
+       " {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'pending'}, {'content': \n",
+       " 'Verify report against original request', 'status': 'pending'}]                                                 \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'in_progress'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'pending'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'pending'}, {'content': \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 'Verify report against original request', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -852,100 +779,85 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "text/html": [ - "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
-       " Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', \n",
-       " 'status': 'in_progress'}, {'content': 'Research structural techniques for context management (e.g., RAG,        \n",
-       " Long-term memory systems, Sliding windows)', 'status': 'pending'}, {'content': 'Research context optimization   \n",
-       " and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'pending'},   \n",
-       " {'content': 'Write final report on context engineering approaches', 'status': 'pending'}]                       \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m 'status': 'in_progress'}, {'content': 'Research structural techniques for context management (e.g., RAG, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Long-term memory systems, Sliding windows)', 'status': 'pending'}, {'content': 'Research context optimization \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'pending'}, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m {'content': 'Write final report on context engineering approaches', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", - "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
-       " 🔧 Tool Call: task                                                                                              \n",
+       " 🔧 Tool Call: write_todos                                                                                       \n",
        "    Args: {                                                                                                      \n",
-       "   \"subagent_type\": \"research-agent\",                                                                            \n",
-       "   \"description\": \"Research the core concepts and definitions of 'context engineering' specifically for AI       \n",
-       " agents. \\nFind out:\\n1. What is context engineering?\\n2. Why is it critical for AI agents?\\n3. Key principles   \n",
-       " (e.g. relevance, coherence, token limits).\\n4. Distinctions between context engineering and prompt              \n",
-       " engineering.\\n\\nReturn a summary of findings with sources.\"                                                     \n",
+       "   \"todos\": [                                                                                                    \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Save research request to /research_request.md\"                                                \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"in_progress\",                                                                                  \n",
+       "       \"content\": \"Research context engineering approaches for AI agents using a sub-agent\"                      \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"pending\",                                                                                      \n",
+       "       \"content\": \"Synthesize findings and write final report to /final_report.md\"                               \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"pending\",                                                                                      \n",
+       "       \"content\": \"Verify report against original request\"                                                       \n",
+       "     }                                                                                                           \n",
+       "   ]                                                                                                             \n",
        " }                                                                                                               \n",
-       "    ID: 60ae692d-a760-4254-8819-d3541d2b6456                                                                     \n",
+       "    ID: b317fa1b-6f81-47df-b0ca-aa5e50bd2b70                                                                     \n",
        "                                                                                                                 \n",
        " 🔧 Tool Call: task                                                                                              \n",
        "    Args: {                                                                                                      \n",
        "   \"subagent_type\": \"research-agent\",                                                                            \n",
-       "   \"description\": \"Research structural techniques for context management in AI agents.\\nFocus on:\\n1.            \n",
-       " Retrieval-Augmented Generation (RAG) patterns for context injection.\\n2. Memory architectures (Short-term vs    \n",
-       " Long-term, Vector databases).\\n3. Context window management strategies (Sliding windows, Hierarchical           \n",
-       " context).\\n4. Dynamic context selection methods.\\n\\nReturn a summary of findings with sources.\"                 \n",
+       "   \"description\": \"Research context engineering approaches used to build AI agents.\\nFocus on:\\n1. Definition    \n",
+       " and core concepts of context engineering.\\n2. Key techniques and strategies (e.g., dynamic context injection,   \n",
+       " RAG, memory management/history pruning, summarization, prompt structuring).\\n3. Architectures or frameworks     \n",
+       " often used (e.g., how LangChain or LlamaIndex handle context).\\n4. Approaches for handling context window       \n",
+       " limits vs. utilizing long-context models.\\n5. Specific patterns like \\\"Needle in a Haystack\\\" optimization or   \n",
+       " \\\"Context Stuffing\\\" vs \\\"Context Selection\\\".\\n\\nReturn a comprehensive summary of findings with citations.\"   \n",
        " }                                                                                                               \n",
-       "    ID: 8ce46bb7-c73d-47ff-a20d-ff7add14a95a                                                                     \n",
-       "                                                                                                                 \n",
-       " 🔧 Tool Call: task                                                                                              \n",
-       "    Args: {                                                                                                      \n",
-       "   \"subagent_type\": \"research-agent\",                                                                            \n",
-       "   \"description\": \"Research context optimization and compression techniques for AI agents.\\nFocus on:\\n1.        \n",
-       " Summarization techniques to reduce context size.\\n2. Prompt compression algorithms (e.g., selective context,    \n",
-       " LLMLingua).\\n3. Token selection/pruning strategies.\\n4. Any specific tools or libraries known for this (e.g.    \n",
-       " LangChain's approaches).\\n\\nReturn a summary of findings with sources.\"                                         \n",
-       " }                                                                                                               \n",
-       "    ID: ae5f9ef0-1ebb-48dc-ae07-438f9205f022                                                                     \n",
+       "    ID: b682976a-b1fa-47a1-89be-153134ba28c7                                                                     \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: task \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"subagent_type\": \"research-agent\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"description\": \"Research the core concepts and definitions of 'context engineering' specifically for AI \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m agents. \\nFind out:\\n1. What is context engineering?\\n2. Why is it critical for AI agents?\\n3. Key principles \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m (e.g. relevance, coherence, token limits).\\n4. Distinctions between context engineering and prompt \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m engineering.\\n\\nReturn a summary of findings with sources.\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"in_progress\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 60ae692d-a760-4254-8819-d3541d2b6456 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: b317fa1b-6f81-47df-b0ca-aa5e50bd2b70 \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m 🔧 Tool Call: task \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \"subagent_type\": \"research-agent\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"description\": \"Research structural techniques for context management in AI agents.\\nFocus on:\\n1. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Retrieval-Augmented Generation (RAG) patterns for context injection.\\n2. Memory architectures (Short-term vs \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Long-term, Vector databases).\\n3. Context window management strategies (Sliding windows, Hierarchical \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m context).\\n4. Dynamic context selection methods.\\n\\nReturn a summary of findings with sources.\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"description\": \"Research context engineering approaches used to build AI agents.\\nFocus on:\\n1. Definition \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m and core concepts of context engineering.\\n2. Key techniques and strategies (e.g., dynamic context injection, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m RAG, memory management/history pruning, summarization, prompt structuring).\\n3. Architectures or frameworks \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m often used (e.g., how LangChain or LlamaIndex handle context).\\n4. Approaches for handling context window \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m limits vs. utilizing long-context models.\\n5. Specific patterns like \\\"Needle in a Haystack\\\" optimization or \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \\\"Context Stuffing\\\" vs \\\"Context Selection\\\".\\n\\nReturn a comprehensive summary of findings with citations.\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 8ce46bb7-c73d-47ff-a20d-ff7add14a95a \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: task \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"subagent_type\": \"research-agent\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"description\": \"Research context optimization and compression techniques for AI agents.\\nFocus on:\\n1. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization techniques to reduce context size.\\n2. Prompt compression algorithms (e.g., selective context, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m LLMLingua).\\n3. Token selection/pruning strategies.\\n4. Any specific tools or libraries known for this (e.g. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m LangChain's approaches).\\n\\nReturn a summary of findings with sources.\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: ae5f9ef0-1ebb-48dc-ae07-438f9205f022 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: b682976a-b1fa-47a1-89be-153134ba28c7 \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -956,423 +868,224 @@ "data": { "text/html": [ "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
-       " Based on the research, here is a summary of the core concepts of **Context Engineering** for AI agents.         \n",
+       " Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'},      \n",
+       " {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status':                \n",
+       " 'in_progress'}, {'content': 'Synthesize findings and write final report to /final_report.md', 'status':         \n",
+       " 'pending'}, {'content': 'Verify report against original request', 'status': 'pending'}]                         \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 'in_progress'}, {'content': 'Synthesize findings and write final report to /final_report.md', 'status': \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 'pending'}, {'content': 'Verify report against original request', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
+       " Context engineering is the systematic practice of designing, managing, and optimizing the information provided  \n",
+       " to AI models to ensure accurate, consistent, and context-aware behavior. It moves beyond simple prompt          \n",
+       " engineering by treating context as a dynamic resource that must be architected, managed, and validated.         \n",
        "                                                                                                                 \n",
-       " ### 1. What is Context Engineering?                                                                             \n",
-       " **Context Engineering** is the technical practice of designing, curating, and managing the total information    \n",
-       " state (the \"context\") provided to an AI agent during its operation. Unlike simple prompting, which focuses on a \n",
-       " single input instruction, context engineering is a **systemic and iterative process** that determines *what*    \n",
-       " information an agent perceives at every step of a workflow [1].                                                 \n",
+       " ### 1. Definition and Core Concepts                                                                             \n",
+       " **Context Engineering** is the \"process of designing, testing, and iterating on the contextual information      \n",
+       " provided to AI agents to shape their behavior and improve task performance\" [1]. Unlike a static prompt,        \n",
+       " context in an agentic system is a living state that changes with every user interaction, tool output, and       \n",
+       " internal reasoning step.                                                                                        \n",
        "                                                                                                                 \n",
-       " It involves configuring the optimal set of tokens—including system instructions, tool definitions, conversation \n",
-       " history, and external data—to ensure the model behaves reliably over time. Ideally, it answers the question:    \n",
-       " *\"What configuration of context is most likely to generate the model's desired behavior?\"* [2]                  \n",
+       " *   **Core Concepts**:                                                                                          \n",
+       "     *   **Context Window**: The working memory limit of the LLM (e.g., 128k tokens). Managing this finite       \n",
+       " resource is the central challenge.                                                                              \n",
+       "     *   **State Management**: Tracking the \"world state\" (user intent, task progress, tool results) across a    \n",
+       " multi-turn conversation.                                                                                        \n",
+       "     *   **Context Pollution**: The degradation of model performance caused by irrelevant or contradictory       \n",
+       " information in the context [2].                                                                                 \n",
+       "     *   **Context Continuity**: Ensuring the agent retains critical information (like user constraints)         \n",
+       " throughout a long interaction without \"forgetting\" them due to truncation.                                      \n",
        "                                                                                                                 \n",
-       " ### 2. Why is it Critical for AI Agents?                                                                        \n",
-       " Context engineering is essential for shifting from simple chatbots to autonomous agents that perform complex,   \n",
-       " multi-step tasks.                                                                                               \n",
-       " *   **Combating \"Context Rot\":** As the amount of information (tokens) in a context window increases, a model’s \n",
-       " ability to retrieve specific details (\"needle-in-a-haystack\") often degrades. This is due to the **attention    \n",
-       " scarcity** inherent in transformer architectures (where computational complexity is $N^2$ relative to context   \n",
-       " length) [1].                                                                                                    \n",
-       " *   **Enabling Long-Horizon Tasks:** Agents working on tasks spanning hours or days (e.g., coding, research)    \n",
-       " accumulate massive amounts of history. Without engineering strategies to manage this, the context window fills  \n",
-       " up, leading to lost instructions, confusion, or crashed sessions [1].                                           \n",
-       " *   **Reliability & Cost:** Providing \"just enough\" high-signal context reduces hallucination risks and         \n",
-       " operational costs associated with processing unnecessary tokens.                                                \n",
+       " ### 2. Key Techniques and Strategies                                                                            \n",
+       " Effective context engineering employs a mix of retrieval, summarization, and structural strategies:             \n",
        "                                                                                                                 \n",
-       " ### 3. Key Principles of Context Engineering                                                                    \n",
-       " Effective context engineering relies on providing the **smallest possible set of high-signal tokens**.          \n",
+       " *   **Dynamic Context Injection**: Instead of loading all data at once, agents \"pull\" information only when     \n",
+       " needed. For example, an agent might use a `search_tool` to inject real-time web results into the context only   \n",
+       " after identifying a knowledge gap [2].                                                                          \n",
+       " *   **Retrieval-Augmented Generation (RAG)**: A standard pattern for handling large knowledge bases. The agent  \n",
+       " retrieves strictly relevant chunks (e.g., top-5 matches) from a vector database to populate the context window, \n",
+       " rather than stuffing the entire document [2, 3].                                                                \n",
+       " *   **Memory Management**:                                                                                      \n",
+       "     *   **Short-term Memory**: Stores immediate conversation history (e.g., last 10 turns).                     \n",
+       "     *   **Long-term Memory**: Persists key facts (e.g., \"User is a vegetarian\") in an external database         \n",
+       " (Postgres, Vector Store) and retrieves them only when relevant [2].                                             \n",
+       "     *   **Pruning/Trimming**: Algorithms that remove older, less relevant tokens to free up space while         \n",
+       " preserving recent interactions [2].                                                                             \n",
+       " *   **Summarization**: Compressing verbose tool outputs. Instead of feeding raw HTML from a scraped website, an \n",
+       " intermediate \"summarizer agent\" distills it into key points before passing it to the main agent [2].            \n",
+       " *   **Prompt Structuring**: Using structured formats (like JSON schemas or XML tags) within the context to      \n",
+       " clearly separate \"System Instructions,\" \"User Input,\" and \"Tool Outputs,\" preventing the model from confusing   \n",
+       " data with instructions.                                                                                         \n",
        "                                                                                                                 \n",
-       " *   **Relevance (Signal-to-Noise Ratio):**                                                                      \n",
-       "     *   **Just-in-Time Retrieval:** Instead of dumping all files into the context (traditional RAG), agents     \n",
-       " should be equipped with tools (like `grep` or `ls`) to autonomously \"pull\" only the information they need when  \n",
-       " they need it [2].                                                                                               \n",
-       "     *   **The \"Right Altitude\":** System prompts should avoid being too brittle (hardcoded logic) or too vague. \n",
-       " They should provide flexible heuristics and distinct sections (e.g., Background, Tools, Output Format).         \n",
-       " *   **Coherence & State Management:**                                                                           \n",
-       "     *   **Compaction:** Periodically summarizing conversation history to compress the context while preserving  \n",
-       " critical decisions and unresolved issues [1].                                                                   \n",
-       "     *   **Structured Note-Taking:** Forcing agents to maintain persistent \"memory files\" (e.g., `scratchpad.md` \n",
-       " or `todo.md`) outside the context window to track progress across resets.                                       \n",
-       " *   **Token Efficiency:**                                                                                       \n",
-       "     *   **Tool Optimization:** Tool definitions should be concise. If a human engineer cannot determine which   \n",
-       " tool to use based on the definition, the agent won't be able to either. Return values from tools should be      \n",
-       " stripped of extraneous data to save space [2].                                                                  \n",
+       " ### 3. Architectures and Frameworks                                                                             \n",
+       " Two primary frameworks dominate the landscape, each with distinct approaches to context handling:               \n",
        "                                                                                                                 \n",
-       " ### 4. Context Engineering vs. Prompt Engineering                                                               \n",
-       " While prompt engineering is about *crafting the message*, context engineering is about *architecting the        \n",
-       " environment*.                                                                                                   \n",
-       "                                                                                                                 \n",
-       " | Feature | Prompt Engineering | Context Engineering |                                                          \n",
+       " | Feature | **LangChain** [3, 4] | **LlamaIndex** [3, 4] |                                                      \n",
        " | :--- | :--- | :--- |                                                                                          \n",
-       " | **Scope** | Discrete, single-turn interactions. | Continuous, multi-turn workflows and system state. |        \n",
-       " | **Focus** | Finding the \"right words\" or instructions. | Curating the \"optimal set of tokens\" (history,       \n",
-       " tools, data). |                                                                                                 \n",
-       " | **Goal** | Optimizing a specific response. | Maintaining agent reliability and state over time. |             \n",
-       " | **Mechanism** | Static text input. | Dynamic management (retrieval, summarization, pruning). |                \n",
-       " | **Analogy** | Writing a good email to an employee. | Designing the employee's office, filing system, and      \n",
-       " handbook. |                                                                                                     \n",
+       " | **Primary Focus** | Flexible workflow & agent orchestration | Data indexing & advanced retrieval (RAG) |      \n",
+       " | **Memory Classes** | `ConversationBufferMemory` (raw history)<br>`ConversationSummaryMemory` (LLM-summarized  \n",
+       " history)<br>`ChatMessageHistory` (storage backend) | `ChatEngine` with modes:<br>`context` (standard            \n",
+       " RAG)<br>`condense_question` (rewrites queries based on history) |                                               \n",
+       " | **Context Strategy** | **Chains**: Sequential passing of context between steps.<br>**Agents**: Dynamic tool   \n",
+       " selection to populate context. | **Indices**: `VectorStoreIndex` (semantic search), `ListIndex` (sequential),   \n",
+       " `TreeIndex` (hierarchical summaries). |                                                                         \n",
+       " | **Optimization** | `Context Compression`: Removing irrelevant tokens from retrieved docs. | `Response         \n",
+       " Synthesizers`: <br>`compact` (stuffs max chunks into prompt)<br>`tree_summarize` (hierarchical reduction) |     \n",
+       "                                                                                                                 \n",
+       " ### 4. Handling Context Limits vs. Long-Context Models                                                          \n",
+       " A critical architectural decision is whether to use RAG or rely on massive context windows (e.g., Gemini 1.5    \n",
+       " Pro's 2M tokens).                                                                                               \n",
+       "                                                                                                                 \n",
+       " *   **RAG + Small Context**:                                                                                    \n",
+       "     *   **Pros**: Significantly cheaper (fewer tokens processed), lower latency, and often higher accuracy for  \n",
+       " specific fact retrieval.                                                                                        \n",
+       "     *   **Cons**: Can miss \"global\" insights that require reading the entire corpus at once.                    \n",
+       " *   **Long-Context Models (LCW)**:                                                                              \n",
+       "     *   **Pros**: Can ingest entire books or codebases, enabling \"whole-document\" reasoning.                    \n",
+       "     *   **Cons**: \"Lost in the Middle\" phenomenon where models forget information in the middle of a massive    \n",
+       " context.                                                                                                        \n",
+       "     *   **Trade-off**: Benchmarks show that RAG systems often outperform LCW models in \"Needle in a Haystack\"   \n",
+       " tests for large corpora (2M+ tokens) because they filter noise before the model sees it [3].                    \n",
+       "                                                                                                                 \n",
+       " ### 5. Specific Patterns                                                                                        \n",
+       " *   **Needle in a Haystack Optimization**:                                                                      \n",
+       "     *   **The Problem**: LLMs struggle to find a specific fact (\"needle\") buried in a large amount of           \n",
+       " irrelevant text (\"haystack\").                                                                                   \n",
+       "     *   **Optimization**: Place critical instructions or facts at the very **beginning** or very **end** of the \n",
+       " context window, as models pay most attention to these areas (Serial Position Effect).                           \n",
+       " *   **Context Stuffing vs. Context Selection**:                                                                 \n",
+       "     *   **Context Stuffing**: Naively filling the window with as much data as possible. This often leads to     \n",
+       " *context distraction* and higher costs [2].                                                                     \n",
+       "     *   **Context Selection**: Intentionally curating what enters the window.                                   \n",
+       "         *   *Techniques*: Using **Node Postprocessors** (LlamaIndex) to filter retrieved nodes by keywords or   \n",
+       " similarity score before they reach the LLM [4].                                                                 \n",
+       "         *   *Query Transformations*: Rewriting a user's vague query into a precise search term to select better \n",
+       " context (e.g., LangChain's `HyDE` or LlamaIndex's `Multi-step query decomposition`) [4].                        \n",
        "                                                                                                                 \n",
        " ### Sources                                                                                                     \n",
-       " [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025.                                \n",
-       " [2] Anthropic Engineering, \"Effective context engineering for AI agents\" (referenced within [1]).               \n",
+       " [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering                         \n",
+       " [2] Context Engineering Strategies (n8n):                                                                       \n",
+       " https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/                       \n",
+       " [3] RAG vs Long Context Window Study:                                                                           \n",
+       " https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs                             \n",
+       " [4] LangChain & LlamaIndex Comparison:                                                                          \n",
+       " https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \n",
+       " in-large-2b5f3fabc360                                                                                           \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Based on the research, here is a summary of the core concepts of **Context Engineering** for AI agents. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Context engineering is the systematic practice of designing, managing, and optimizing the information provided \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m to AI models to ensure accurate, consistent, and context-aware behavior. It moves beyond simple prompt \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m engineering by treating context as a dynamic resource that must be architected, managed, and validated. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 1. What is Context Engineering? \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m **Context Engineering** is the technical practice of designing, curating, and managing the total information \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m state (the \"context\") provided to an AI agent during its operation. Unlike simple prompting, which focuses on a \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m single input instruction, context engineering is a **systemic and iterative process** that determines *what* \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m information an agent perceives at every step of a workflow [1]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 1. Definition and Core Concepts \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m **Context Engineering** is the \"process of designing, testing, and iterating on the contextual information \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m provided to AI agents to shape their behavior and improve task performance\" [1]. Unlike a static prompt, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context in an agentic system is a living state that changes with every user interaction, tool output, and \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m internal reasoning step. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m It involves configuring the optimal set of tokens—including system instructions, tool definitions, conversation \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m history, and external data—to ensure the model behaves reliably over time. Ideally, it answers the question: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m *\"What configuration of context is most likely to generate the model's desired behavior?\"* [2] \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Core Concepts**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Window**: The working memory limit of the LLM (e.g., 128k tokens). Managing this finite \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m resource is the central challenge. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **State Management**: Tracking the \"world state\" (user intent, task progress, tool results) across a \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m multi-turn conversation. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Pollution**: The degradation of model performance caused by irrelevant or contradictory \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m information in the context [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Continuity**: Ensuring the agent retains critical information (like user constraints) \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m throughout a long interaction without \"forgetting\" them due to truncation. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 2. Why is it Critical for AI Agents? \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Context engineering is essential for shifting from simple chatbots to autonomous agents that perform complex, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m multi-step tasks. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Combating \"Context Rot\":** As the amount of information (tokens) in a context window increases, a model’s \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ability to retrieve specific details (\"needle-in-a-haystack\") often degrades. This is due to the **attention \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m scarcity** inherent in transformer architectures (where computational complexity is $N^2$ relative to context \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m length) [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Enabling Long-Horizon Tasks:** Agents working on tasks spanning hours or days (e.g., coding, research) \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m accumulate massive amounts of history. Without engineering strategies to manage this, the context window fills \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m up, leading to lost instructions, confusion, or crashed sessions [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Reliability & Cost:** Providing \"just enough\" high-signal context reduces hallucination risks and \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m operational costs associated with processing unnecessary tokens. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 2. Key Techniques and Strategies \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Effective context engineering employs a mix of retrieval, summarization, and structural strategies: \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 3. Key Principles of Context Engineering \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Effective context engineering relies on providing the **smallest possible set of high-signal tokens**. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Dynamic Context Injection**: Instead of loading all data at once, agents \"pull\" information only when \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m needed. For example, an agent might use a `search_tool` to inject real-time web results into the context only \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m after identifying a knowledge gap [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Retrieval-Augmented Generation (RAG)**: A standard pattern for handling large knowledge bases. The agent \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m retrieves strictly relevant chunks (e.g., top-5 matches) from a vector database to populate the context window, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m rather than stuffing the entire document [2, 3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Memory Management**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Short-term Memory**: Stores immediate conversation history (e.g., last 10 turns). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Long-term Memory**: Persists key facts (e.g., \"User is a vegetarian\") in an external database \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m (Postgres, Vector Store) and retrieves them only when relevant [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Pruning/Trimming**: Algorithms that remove older, less relevant tokens to free up space while \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m preserving recent interactions [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Summarization**: Compressing verbose tool outputs. Instead of feeding raw HTML from a scraped website, an \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m intermediate \"summarizer agent\" distills it into key points before passing it to the main agent [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Prompt Structuring**: Using structured formats (like JSON schemas or XML tags) within the context to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m clearly separate \"System Instructions,\" \"User Input,\" and \"Tool Outputs,\" preventing the model from confusing \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m data with instructions. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Relevance (Signal-to-Noise Ratio):** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Just-in-Time Retrieval:** Instead of dumping all files into the context (traditional RAG), agents \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m should be equipped with tools (like `grep` or `ls`) to autonomously \"pull\" only the information they need when \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m they need it [2]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **The \"Right Altitude\":** System prompts should avoid being too brittle (hardcoded logic) or too vague. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m They should provide flexible heuristics and distinct sections (e.g., Background, Tools, Output Format). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Coherence & State Management:** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Compaction:** Periodically summarizing conversation history to compress the context while preserving \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m critical decisions and unresolved issues [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Structured Note-Taking:** Forcing agents to maintain persistent \"memory files\" (e.g., `scratchpad.md` \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m or `todo.md`) outside the context window to track progress across resets. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Token Efficiency:** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Tool Optimization:** Tool definitions should be concise. If a human engineer cannot determine which \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m tool to use based on the definition, the agent won't be able to either. Return values from tools should be \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m stripped of extraneous data to save space [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 3. Architectures and Frameworks \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Two primary frameworks dominate the landscape, each with distinct approaches to context handling: \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 4. Context Engineering vs. Prompt Engineering \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m While prompt engineering is about *crafting the message*, context engineering is about *architecting the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m environment*. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | Feature | Prompt Engineering | Context Engineering | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | Feature | **LangChain** [3, 4] | **LlamaIndex** [3, 4] | \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m | :--- | :--- | :--- | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Scope** | Discrete, single-turn interactions. | Continuous, multi-turn workflows and system state. | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Focus** | Finding the \"right words\" or instructions. | Curating the \"optimal set of tokens\" (history, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m tools, data). | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Goal** | Optimizing a specific response. | Maintaining agent reliability and state over time. | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Mechanism** | Static text input. | Dynamic management (retrieval, summarization, pruning). | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Analogy** | Writing a good email to an employee. | Designing the employee's office, filing system, and \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m handbook. | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | **Primary Focus** | Flexible workflow & agent orchestration | Data indexing & advanced retrieval (RAG) | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | **Memory Classes** | `ConversationBufferMemory` (raw history)
`ConversationSummaryMemory` (LLM-summarized \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m history)
`ChatMessageHistory` (storage backend) | `ChatEngine` with modes:
`context` (standard \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m RAG)
`condense_question` (rewrites queries based on history) | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | **Context Strategy** | **Chains**: Sequential passing of context between steps.
**Agents**: Dynamic tool \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m selection to populate context. | **Indices**: `VectorStoreIndex` (semantic search), `ListIndex` (sequential), \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m `TreeIndex` (hierarchical summaries). | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | **Optimization** | `Context Compression`: Removing irrelevant tokens from retrieved docs. | `Response \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Synthesizers`:
`compact` (stuffs max chunks into prompt)
`tree_summarize` (hierarchical reduction) | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 4. Handling Context Limits vs. Long-Context Models \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m A critical architectural decision is whether to use RAG or rely on massive context windows (e.g., Gemini 1.5 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Pro's 2M tokens). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **RAG + Small Context**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Pros**: Significantly cheaper (fewer tokens processed), lower latency, and often higher accuracy for \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m specific fact retrieval. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Cons**: Can miss \"global\" insights that require reading the entire corpus at once. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Long-Context Models (LCW)**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Pros**: Can ingest entire books or codebases, enabling \"whole-document\" reasoning. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Cons**: \"Lost in the Middle\" phenomenon where models forget information in the middle of a massive \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Trade-off**: Benchmarks show that RAG systems often outperform LCW models in \"Needle in a Haystack\" \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m tests for large corpora (2M+ tokens) because they filter noise before the model sees it [3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 5. Specific Patterns \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Needle in a Haystack Optimization**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **The Problem**: LLMs struggle to find a specific fact (\"needle\") buried in a large amount of \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m irrelevant text (\"haystack\"). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Optimization**: Place critical instructions or facts at the very **beginning** or very **end** of the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context window, as models pay most attention to these areas (Serial Position Effect). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Stuffing vs. Context Selection**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Stuffing**: Naively filling the window with as much data as possible. This often leads to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m *context distraction* and higher costs [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Selection**: Intentionally curating what enters the window. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * *Techniques*: Using **Node Postprocessors** (LlamaIndex) to filter retrieved nodes by keywords or \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m similarity score before they reach the LLM [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * *Query Transformations*: Rewriting a user's vague query into a precise search term to select better \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context (e.g., LangChain's `HyDE` or LlamaIndex's `Multi-step query decomposition`) [4]. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m ### Sources \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [2] Anthropic Engineering, \"Effective context engineering for AI agents\" (referenced within [1]). \u001b[33m│\u001b[0m\n", - "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
-       " Research into structural techniques for context management in AI agents reveals a shift from static retrieval   \n",
-       " to dynamic, modular architectures that mimic human cognitive processes.                                         \n",
-       "                                                                                                                 \n",
-       " ### 1. Retrieval-Augmented Generation (RAG) Patterns                                                            \n",
-       " Modern RAG has evolved beyond simple vector search into **Modular RAG** architectures [1].                      \n",
-       " *   **Pre-Retrieval**: Techniques include **Query Rewriting** (transforming user input into optimal search      \n",
-       " queries) and **Query Expansion** (generating multiple sub-queries to broaden search coverage).                  \n",
-       " *   **Post-Retrieval**: Retrieved contexts undergo **Re-ranking** (scoring by relevance) and **Pruning**        \n",
-       " (removing redundant information) before reaching the LLM.                                                       \n",
-       " *   **GraphRAG**: A structural technique that builds a knowledge graph from the corpus. It uses community       \n",
-       " detection to create hierarchical summaries, allowing the agent to answer \"global\" questions (e.g., \"What are    \n",
-       " the main themes?\") that standard vector search misses [2].                                                      \n",
-       " *   **Agentic RAG**: Treats retrieval as a multi-step reasoning process where an agent actively critiques       \n",
-       " retrieved data and iteratively queries until it has sufficient context [1].                                     \n",
-       "                                                                                                                 \n",
-       " ### 2. Memory Architectures                                                                                     \n",
-       " Agent memory is structurally divided to balance immediate reasoning with long-term retention [3].               \n",
-       " *   **Short-Term Memory (STM)**: Resides in the LLM's **context window**. It acts as a \"working memory\" for the \n",
-       " current session, often managed by summary buffers to prevent overflow.                                          \n",
-       " *   **Long-Term Memory (LTM)**: Implemented via **Vector Databases** (for semantic search) and **Knowledge      \n",
-       " Graphs** (for structured relationships).                                                                        \n",
-       " *   **Episodic Memory**: Stores specific past experiences (sequences of actions/results) to enable few-shot     \n",
-       " learning from history.                                                                                          \n",
-       " *   **Procedural Memory**: Stores \"skills\" or \"how-to\" knowledge, often encoded as static prompt libraries or   \n",
-       " code tools the agent can invoke.                                                                                \n",
-       " *   **MemGPT Pattern**: An architecture where the LLM acts as an operating system, autonomously managing its    \n",
-       " own memory by deciding when to move information between context (STM) and storage (LTM) [3].                    \n",
-       "                                                                                                                 \n",
-       " ### 3. Context Window Management Strategies                                                                     \n",
-       " Techniques to maximize the utility of limited context windows include:                                          \n",
-       " *   **Sliding Windows**: A standard technique where long documents are broken into overlapping chunks (e.g.,    \n",
-       " 512 tokens with 128-token overlap) to ensure no context is lost at boundaries during processing [4].            \n",
-       " *   **Hierarchical Context**: Strategies like **GraphRAG** or **Fusion-in-Decoder (FiD)** process information   \n",
-       " at multiple levels of abstraction. Documents are first summarized into high-level vectors; only relevant        \n",
-       " sections are expanded into full text [2, 4].                                                                    \n",
-       " *   **Compression**: summarizing or embedding retrieved chunks into dense vectors before they enter the main    \n",
-       " context, allowing the agent to \"see\" more documents at lower resolution.                                        \n",
-       "                                                                                                                 \n",
-       " ### 4. Dynamic Context Selection Methods                                                                        \n",
-       " Selecting the *right* context is as critical as retrieving it.                                                  \n",
-       " *   **Re-ranking Algorithms**: **Cross-Encoders** (e.g., BERT-based) score the semantic similarity between a    \n",
-       " query and a document more accurately than bi-encoders but are computationally expensive. A common pattern is to \n",
-       " retrieve 100 documents with fast vector search, then re-rank the top 10 with a cross-encoder [5].               \n",
-       " *   **Semantic Routing**: Using a lightweight classifier to direct queries to the most appropriate specialized  \n",
-       " index or RAG pipeline (e.g., routing technical queries to a documentation index vs. billing queries to a SQL    \n",
-       " database).                                                                                                      \n",
-       " *   **Stochastic & Fairness Rankers**: New methods introduce \"stochastic rankers\" to prevent exposure bias      \n",
-       " (where only the most popular docs are ever seen) and ensure diversity in the retrieved context [5].             \n",
-       "                                                                                                                 \n",
-       " ### Sources                                                                                                     \n",
-       " [1] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/                         \n",
-       " [2] GraphRAG Overview: https://microsoft.github.io/graphrag/                                                    \n",
-       " [3] Agent Memory Systems (STM/LTM/Episodic):                                                                    \n",
-       " https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge           \n",
-       " [4] LLM Input Formatting & Sliding Windows:                                                                     \n",
-       " https://milvus.io/ai-quick-reference/what-modifications-might-be-needed-to-the-llms-input-formatting-or-archite \n",
-       " cture-to-best-take-advantage-of-retrieved-documents-for-example-adding-special-tokens-or-segments-to-separate-c \n",
-       " ontext                                                                                                          \n",
-       " [5] Re-ranking & Dynamic Selection:                                                                             \n",
-       " https://www.chitika.com/re-ranking-in-retrieval-augmented-generation-how-to-use-re-rankers-in-rag/              \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Research into structural techniques for context management in AI agents reveals a shift from static retrieval \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m to dynamic, modular architectures that mimic human cognitive processes. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 1. Retrieval-Augmented Generation (RAG) Patterns \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Modern RAG has evolved beyond simple vector search into **Modular RAG** architectures [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Pre-Retrieval**: Techniques include **Query Rewriting** (transforming user input into optimal search \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m queries) and **Query Expansion** (generating multiple sub-queries to broaden search coverage). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Post-Retrieval**: Retrieved contexts undergo **Re-ranking** (scoring by relevance) and **Pruning** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m (removing redundant information) before reaching the LLM. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **GraphRAG**: A structural technique that builds a knowledge graph from the corpus. It uses community \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m detection to create hierarchical summaries, allowing the agent to answer \"global\" questions (e.g., \"What are \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m the main themes?\") that standard vector search misses [2]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Agentic RAG**: Treats retrieval as a multi-step reasoning process where an agent actively critiques \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m retrieved data and iteratively queries until it has sufficient context [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 2. Memory Architectures \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Agent memory is structurally divided to balance immediate reasoning with long-term retention [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Short-Term Memory (STM)**: Resides in the LLM's **context window**. It acts as a \"working memory\" for the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m current session, often managed by summary buffers to prevent overflow. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Long-Term Memory (LTM)**: Implemented via **Vector Databases** (for semantic search) and **Knowledge \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Graphs** (for structured relationships). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Episodic Memory**: Stores specific past experiences (sequences of actions/results) to enable few-shot \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m learning from history. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Procedural Memory**: Stores \"skills\" or \"how-to\" knowledge, often encoded as static prompt libraries or \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m code tools the agent can invoke. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **MemGPT Pattern**: An architecture where the LLM acts as an operating system, autonomously managing its \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m own memory by deciding when to move information between context (STM) and storage (LTM) [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 3. Context Window Management Strategies \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Techniques to maximize the utility of limited context windows include: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Sliding Windows**: A standard technique where long documents are broken into overlapping chunks (e.g., \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m 512 tokens with 128-token overlap) to ensure no context is lost at boundaries during processing [4]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Hierarchical Context**: Strategies like **GraphRAG** or **Fusion-in-Decoder (FiD)** process information \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m at multiple levels of abstraction. Documents are first summarized into high-level vectors; only relevant \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m sections are expanded into full text [2, 4]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Compression**: summarizing or embedding retrieved chunks into dense vectors before they enter the main \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m context, allowing the agent to \"see\" more documents at lower resolution. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 4. Dynamic Context Selection Methods \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Selecting the *right* context is as critical as retrieving it. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Re-ranking Algorithms**: **Cross-Encoders** (e.g., BERT-based) score the semantic similarity between a \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m query and a document more accurately than bi-encoders but are computationally expensive. A common pattern is to \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m retrieve 100 documents with fast vector search, then re-rank the top 10 with a cross-encoder [5]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Semantic Routing**: Using a lightweight classifier to direct queries to the most appropriate specialized \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m index or RAG pipeline (e.g., routing technical queries to a documentation index vs. billing queries to a SQL \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m database). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Stochastic & Fairness Rankers**: New methods introduce \"stochastic rankers\" to prevent exposure bias \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m (where only the most popular docs are ever seen) and ensure diversity in the retrieved context [5]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### Sources \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [2] GraphRAG Overview: https://microsoft.github.io/graphrag/ \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [3] Agent Memory Systems (STM/LTM/Episodic): \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [4] LLM Input Formatting & Sliding Windows: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://milvus.io/ai-quick-reference/what-modifications-might-be-needed-to-the-llms-input-formatting-or-archite \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m cture-to-best-take-advantage-of-retrieved-documents-for-example-adding-special-tokens-or-segments-to-separate-c \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ontext \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [5] Re-ranking & Dynamic Selection: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://www.chitika.com/re-ranking-in-retrieval-augmented-generation-how-to-use-re-rankers-in-rag/ \u001b[33m│\u001b[0m\n", - "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
-       " Research into context optimization reveals a multi-layered approach ranging from high-level architectural       \n",
-       " strategies (summarization) to low-level token manipulation (pruning). Below is a summary of findings on         \n",
-       " techniques for reducing context size and optimizing AI agent performance.                                       \n",
-       "                                                                                                                 \n",
-       " ### 1. Summarization Techniques                                                                                 \n",
-       " Summarization is the most common high-level strategy for managing long conversation histories.                  \n",
-       "                                                                                                                 \n",
-       " *   **Rolling Summaries (Incremental Compression):** Instead of retaining the entire history, the system        \n",
-       " periodically \"melts down\" previous turns into a concise summary. This maintains continuity but may lose         \n",
-       " specific nuances over time [1].                                                                                 \n",
-       " *   **Hybrid Memory:** A robust approach that combines different memory types. It typically preserves \"pinned\"  \n",
-       " key messages (like the system prompt or initial user instructions) verbatim while compressing intermediate      \n",
-       " messages into summaries. This ensures critical instructions are never lost to compression artifacts [1].        \n",
-       " *   **Externalized Memory:** For agents requiring long-term recall, context is offloaded to a vector database.  \n",
-       " Relevant information is retrieved semantically (RAG) rather than keeping everything in the active context       \n",
-       " window [1].                                                                                                     \n",
-       "                                                                                                                 \n",
-       " ### 2. Prompt Compression Algorithms                                                                            \n",
-       " These algorithms aim to reduce the number of tokens sent to the model without losing semantic meaning, often by \n",
-       " analyzing the information density of the text.                                                                  \n",
-       "                                                                                                                 \n",
-       " *   **Selective Context:** This method uses a smaller base language model (like GPT-2) to calculate the         \n",
-       " \"self-information\" (perplexity) of lexical units (tokens, phrases, or sentences). Content with low              \n",
-       " self-information—meaning it is predictable or redundant—is pruned. This can reduce context usage significantly  \n",
-       " while maintaining performance on tasks like summarization and QA [2].                                           \n",
-       " *   **LLMLingua:** A coarse-to-fine compression framework. It employs a \"budget controller\" to dynamically      \n",
-       " allocate different compression ratios to different parts of the prompt (e.g., compressing few-shot              \n",
-       " demonstrations aggressively while preserving the user's specific instruction). It also aligns the distribution  \n",
-       " between the small compression model and the target LLM to prevent performance degradation [3].                  \n",
-       "                                                                                                                 \n",
-       " ### 3. Token Selection & Pruning Strategies                                                                     \n",
-       " Token pruning operates at a lower level, often during the inference process itself.                             \n",
-       "                                                                                                                 \n",
-       " *   **Dynamic Token Pruning (e.g., LazyLLM):** Unlike static pruning which removes tokens once and for all,     \n",
-       " dynamic strategies like LazyLLM selectively compute the Key-Value (KV) cache for tokens only when they are      \n",
-       " important for the *next* token prediction. This allows the model to \"lazy load\" context, accelerating the       \n",
-       " pre-filling stage and decoding without fine-tuning [4].                                                         \n",
-       " *   **Token Merging (ToMe):** While originally popular in Vision Transformers, similar concepts are applied to  \n",
-       " LLMs where redundant tokens are merged rather than dropped, preserving their signal while reducing the sequence \n",
-       " length [3].                                                                                                     \n",
-       " *   **Dynamic Cutoff (Token Budgeting):** A simple LIFO (Last-In, First-Out) strategy where the oldest messages \n",
-       " are dropped once a strict token limit is reached. While easy to implement, it risks abrupt context loss [1].    \n",
-       "                                                                                                                 \n",
-       " ### 4. Tools and Libraries                                                                                      \n",
-       " Several libraries provide out-of-the-box implementations for these techniques.                                  \n",
-       "                                                                                                                 \n",
-       " *   **LangChain:** Offers a suite of tools specifically for context optimization:                               \n",
-       "     *   **Contextual Compression Retriever:** Wraps a base retriever and compresses the retrieved documents     \n",
-       " before returning them.                                                                                          \n",
-       "     *   **LLMChainExtractor:** Uses an LLM chain to extract *only* the relevant sentences from a retrieved      \n",
-       " document, discarding irrelevant fluff.                                                                          \n",
-       "     *   **EmbeddingsFilter:** A cheaper/faster alternative that filters documents based on embedding similarity \n",
-       " thresholds rather than using an LLM call.                                                                       \n",
-       "     *   **DocumentCompressorPipeline:** Allows chaining multiple transformers (e.g., splitting text -> removing \n",
-       " redundant documents -> filtering by relevance) [5].                                                             \n",
-       " *   **Slimcontext:** A lightweight Node.js library designed for \"hybrid memory\" management. It automates the    \n",
-       " process of preserving key messages while summarizing intermediate history to fit within a specific token budget \n",
-       " [1].                                                                                                            \n",
-       "                                                                                                                 \n",
-       " ### Sources                                                                                                     \n",
-       " [1] Don’t Let Your AI Agent Forget: Smarter Strategies for Summarizing Message History:                         \n",
-       " https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his \n",
-       " tory-a2d5284539f1                                                                                               \n",
-       " [2] Selective Context GitHub Repository: https://github.com/liyucheng09/Selective_Context                       \n",
-       " [3] LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models:                          \n",
-       " https://arxiv.org/html/2310.05736v2                                                                             \n",
-       " [4] LazyLLM: Dynamic Token Pruning for Efficient Long Context LLM Inference:                                    \n",
-       " https://machinelearning.apple.com/research/dynamic-token-pruning                                                \n",
-       " [5] LangChain Contextual Compression Retriever:                                                                 \n",
-       " https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html      \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Research into context optimization reveals a multi-layered approach ranging from high-level architectural \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m strategies (summarization) to low-level token manipulation (pruning). Below is a summary of findings on \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m techniques for reducing context size and optimizing AI agent performance. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 1. Summarization Techniques \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Summarization is the most common high-level strategy for managing long conversation histories. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Rolling Summaries (Incremental Compression):** Instead of retaining the entire history, the system \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m periodically \"melts down\" previous turns into a concise summary. This maintains continuity but may lose \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m specific nuances over time [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Hybrid Memory:** A robust approach that combines different memory types. It typically preserves \"pinned\" \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m key messages (like the system prompt or initial user instructions) verbatim while compressing intermediate \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m messages into summaries. This ensures critical instructions are never lost to compression artifacts [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Externalized Memory:** For agents requiring long-term recall, context is offloaded to a vector database. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Relevant information is retrieved semantically (RAG) rather than keeping everything in the active context \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m window [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 2. Prompt Compression Algorithms \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m These algorithms aim to reduce the number of tokens sent to the model without losing semantic meaning, often by \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m analyzing the information density of the text. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Selective Context:** This method uses a smaller base language model (like GPT-2) to calculate the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \"self-information\" (perplexity) of lexical units (tokens, phrases, or sentences). Content with low \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m self-information—meaning it is predictable or redundant—is pruned. This can reduce context usage significantly \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m while maintaining performance on tasks like summarization and QA [2]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **LLMLingua:** A coarse-to-fine compression framework. It employs a \"budget controller\" to dynamically \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m allocate different compression ratios to different parts of the prompt (e.g., compressing few-shot \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m demonstrations aggressively while preserving the user's specific instruction). It also aligns the distribution \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m between the small compression model and the target LLM to prevent performance degradation [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 3. Token Selection & Pruning Strategies \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Token pruning operates at a lower level, often during the inference process itself. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Dynamic Token Pruning (e.g., LazyLLM):** Unlike static pruning which removes tokens once and for all, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m dynamic strategies like LazyLLM selectively compute the Key-Value (KV) cache for tokens only when they are \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m important for the *next* token prediction. This allows the model to \"lazy load\" context, accelerating the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m pre-filling stage and decoding without fine-tuning [4]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Token Merging (ToMe):** While originally popular in Vision Transformers, similar concepts are applied to \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m LLMs where redundant tokens are merged rather than dropped, preserving their signal while reducing the sequence \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m length [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Dynamic Cutoff (Token Budgeting):** A simple LIFO (Last-In, First-Out) strategy where the oldest messages \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m are dropped once a strict token limit is reached. While easy to implement, it risks abrupt context loss [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 4. Tools and Libraries \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Several libraries provide out-of-the-box implementations for these techniques. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **LangChain:** Offers a suite of tools specifically for context optimization: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Contextual Compression Retriever:** Wraps a base retriever and compresses the retrieved documents \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m before returning them. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **LLMChainExtractor:** Uses an LLM chain to extract *only* the relevant sentences from a retrieved \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m document, discarding irrelevant fluff. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **EmbeddingsFilter:** A cheaper/faster alternative that filters documents based on embedding similarity \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m thresholds rather than using an LLM call. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **DocumentCompressorPipeline:** Allows chaining multiple transformers (e.g., splitting text -> removing \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m redundant documents -> filtering by relevance) [5]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Slimcontext:** A lightweight Node.js library designed for \"hybrid memory\" management. It automates the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m process of preserving key messages while summarizing intermediate history to fit within a specific token budget \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### Sources \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1] Don’t Let Your AI Agent Forget: Smarter Strategies for Summarizing Message History: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m tory-a2d5284539f1 \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [2] Selective Context GitHub Repository: https://github.com/liyucheng09/Selective_Context \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [3] LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://arxiv.org/html/2310.05736v2 \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [4] LazyLLM: Dynamic Token Pruning for Efficient Long Context LLM Inference: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://machinelearning.apple.com/research/dynamic-token-pruning \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [5] LangChain Contextual Compression Retriever: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [2] Context Engineering Strategies (n8n): \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [3] RAG vs Long Context Window Study: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [4] LangChain & LlamaIndex Comparison: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m in-large-2b5f3fabc360 \u001b[33m│\u001b[0m\n", "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1384,148 +1097,217 @@ "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
+       " 🔧 Tool Call: write_todos                                                                                       \n",
+       "    Args: {                                                                                                      \n",
+       "   \"todos\": [                                                                                                    \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Save research request to /research_request.md\"                                                \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Research context engineering approaches for AI agents using a sub-agent\"                      \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"in_progress\",                                                                                  \n",
+       "       \"content\": \"Synthesize findings and write final report to /final_report.md\"                               \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"pending\",                                                                                      \n",
+       "       \"content\": \"Verify report against original request\"                                                       \n",
+       "     }                                                                                                           \n",
+       "   ]                                                                                                             \n",
+       " }                                                                                                               \n",
+       "    ID: ea8aeaae-7e6a-4f8b-beba-1f252efabd24                                                                     \n",
+       "                                                                                                                 \n",
        " 🔧 Tool Call: write_file                                                                                        \n",
        "    Args: {                                                                                                      \n",
-       "   \"content\": \"# Context Engineering Approaches for Building AI Agents\\n\\nContext Engineering is the systematic  \n",
-       " practice of designing, curating, and managing the information state (context) provided to an AI agent. Unlike   \n",
-       " prompt engineering, which focuses on crafting static instructions, context engineering is an architectural      \n",
-       " discipline concerned with the dynamic flow of information\\u2014history, tools, and data\\u2014to ensure agent    \n",
-       " reliability over long-horizon tasks.\\n\\n## Core Principles\\n\\nEffective context engineering aims to solve       \n",
-       " \\\"context rot\\\" and attention scarcity by optimizing the signal-to-noise ratio within the agent's limited       \n",
-       " context window.\\n\\n*   **Dynamic State Management**: Moving beyond static prompts to dynamic environments where \n",
-       " context is retrieved, pruned, and updated in real-time [1].\\n*   **Signal Maximization**: Prioritizing          \n",
-       " high-value tokens. As context length increases, a model's ability to retrieve specific details decreases; thus, \n",
-       " reducing noise is critical for accuracy [2].\\n*   **Distinction from Prompt Engineering**:\\n    *   *Prompt     \n",
-       " Engineering*: \\\"How do I ask this question to get the best answer?\\\"\\n    *   *Context Engineering*: \\\"How do I \n",
-       " architect the agent's memory and environment so it has the right information to answer any question?\\\"          \n",
-       " [1].\\n\\n## Structural Approaches\\n\\n### 1. Retrieval-Augmented Generation (RAG) Patterns\\nModern agents use     \n",
-       " advanced RAG patterns to inject context dynamically rather than stuffing the window.\\n*   **Agentic RAG**: A    \n",
-       " multi-step process where the agent actively critiques retrieved data and iteratively queries until it has       \n",
-       " sufficient context, rather than accepting the first retrieval result [3].\\n*   **GraphRAG**: Uses knowledge     \n",
-       " graphs to structure data hierarchically. This allows agents to answer \\\"global\\\" questions (e.g., \\\"What are    \n",
-       " the main themes in this dataset?\\\") by traversing community summaries, a capability often missing in standard   \n",
-       " vector search [4].\\n*   **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and       \n",
-       " post-retrieval steps (re-ranking/pruning) to refine the context before it reaches the LLM [3].\\n\\n### 2. Memory \n",
-       " Architectures\\nTo handle long-running tasks, agents employ split-memory architectures mimicking human           \n",
-       " cognition.\\n*   **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory           \n",
-       " hierarchy. It explicitly moves information between \\\"active context\\\" (RAM) and \\\"archival storage\\\"            \n",
-       " (Disk/Vector DB) to manage limited context windows autonomously [5].\\n*   **Short-Term vs. Long-Term**:\\n    *  \n",
-       " *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling summaries.\\n    \n",
-       " *   *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for infinite    \n",
-       " storage [5].\\n*   **Episodic Memory**: Storing sequences of past actions and results to enable few-shot         \n",
-       " learning from previous experiences.\\n\\n### 3. Context Management Strategies\\n*   **Sliding Windows**: Breaking  \n",
-       " long documents into overlapping chunks (e.g., 512 tokens with 128-token overlap) to process streams of data     \n",
-       " without losing boundary context [6].\\n*   **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at \n",
-       " multiple levels of abstraction. Agents first scan high-level summaries and only expand relevant sections into   \n",
-       " full text [4].\\n\\n## Optimization & Compression Techniques\\n\\nAs context windows fill, performance degrades and \n",
-       " costs rise. Optimization techniques aim to compress information without losing semantic value.\\n\\n### 1.        \n",
-       " Summarization Strategies\\n*   **Rolling Summaries**: Periodically \\\"melting down\\\" the conversation history     \n",
-       " into a concise narrative. This frees up space but risks losing specific details over time [7].\\n*   **Hybrid    \n",
-       " Memory**: A robust approach that keeps \\\"pinned\\\" messages (system prompts, user instructions) verbatim while   \n",
-       " compressing intermediate conversation turns. This prevents critical instructions from being distorted by        \n",
-       " summarization artifacts [7].\\n\\n### 2. Algorithmic Compression\\n*   **Selective Context**: Uses a smaller       \n",
-       " language model (like GPT-2) to calculate the \\\"self-information\\\" (perplexity) of tokens. Predictable or        \n",
-       " redundant tokens are pruned, retaining only high-information content [8].\\n*   **LLMLingua**: A coarse-to-fine  \n",
-       " compression framework that uses a budget controller to allocate compression ratios dynamically. It creates a    \n",
-       " compressed prompt that retains the original instruction's intent while reducing token count by up to 20x        \n",
-       " [9].\\n\\n### 3. Dynamic Token Pruning\\n*   **LazyLLM**: A specialized inference technique that selectively       \n",
-       " computes the Key-Value (KV) cache only for tokens important for the *next* token prediction. This allows the    \n",
-       " model to \\\"lazy load\\\" context, speeding up processing for long contexts [10].\\n\\n## Implementation Tools\\n*    \n",
-       " **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter and    \n",
-       " compress documents before they enter the agent's context [11].\\n*   **Slimcontext**: A lightweight library for  \n",
-       " managing hybrid memory (pinned + summarized) in Node.js agents [7].\\n\\n### Sources\\n[1] HowAIWorks.ai,          \n",
-       " \\\"Context Engineering: AI Agent Optimization Guide,\\\" Oct 2025.\\n[2] Anthropic Engineering, \\\"Effective context \n",
-       " engineering for AI agents.\\\"\\n[3] Advanced & Modular RAG Patterns:                                              \n",
-       " https://ai-radar.aoe.com/architecture-pattern/rag/\\n[4] GraphRAG Overview:                                      \n",
-       " https://microsoft.github.io/graphrag/\\n[5] Agent Memory Systems:                                                \n",
-       " https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge\\n[6] LLM  \n",
-       " Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/\\n[7] Don\\u2019t Let Your AI Agent     \n",
-       " Forget:                                                                                                         \n",
-       " https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his \n",
-       " tory-a2d5284539f1\\n[8] Selective Context: https://github.com/liyucheng09/Selective_Context\\n[9] LLMLingua:      \n",
-       " https://arxiv.org/html/2310.05736v2\\n[10] LazyLLM:                                                              \n",
-       " https://machinelearning.apple.com/research/dynamic-token-pruning\\n[11] LangChain Contextual Compression:        \n",
-       " https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html\",    \n",
-       "   \"file_path\": \"/final_report.md\"                                                                               \n",
+       "   \"file_path\": \"/final_report.md\",                                                                              \n",
+       "   \"content\": \"# Context Engineering Approaches for AI Agents\\n\\n## 1. Introduction\\nContext Engineering is the  \n",
+       " systematic practice of designing, managing, and optimizing the information provided to AI models to ensure      \n",
+       " accurate, consistent, and context-aware behavior. While often confused with prompt engineering (which focuses   \n",
+       " on static instructions), context engineering treats the AI's context window as a dynamic, finite resource that  \n",
+       " must be architected and managed throughout the lifecycle of an interaction. It is a critical discipline for     \n",
+       " building robust AI agents that can maintain state and relevance over long, complex tasks.\\n\\n## 2. Core         \n",
+       " Concepts\\nTo build effective agents, developers must manage several fundamental aspects of the Large Language   \n",
+       " Model (LLM) environment:\\n\\n*   **Context Window**: The hard limit on the amount of text (tokens) an LLM can    \n",
+       " process at once (e.g., 8k, 128k, or 1M tokens). Effective engineering ensures this space is used                \n",
+       " efficiently.\\n*   **State Management**: Tracking the \\\"world state\\\" across multi-turn conversations. This      \n",
+       " includes user intent, task progress, and tool outputs.\\n*   **Context Pollution**: The phenomenon where         \n",
+       " irrelevant or contradictory information in the context degrades model performance, leading to hallucinations or \n",
+       " confusion [2].\\n*   **Serial Position Effect**: The tendency of LLMs to pay more attention to information at    \n",
+       " the very beginning and very end of the context window, often ignoring details buried in the middle (\\\"Lost in   \n",
+       " the Middle\\\") [3].\\n\\n## 3. Key Approaches and Techniques\\n\\n### Dynamic Context Injection & Retrieval          \n",
+       " (RAG)\\nRather than loading all available data at once, agents \\\"pull\\\" information only when necessary.\\n*      \n",
+       " **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data only     \n",
+       " after identifying a specific knowledge gap [2].\\n*   **RAG (Retrieval-Augmented Generation)**: The standard for \n",
+       " large knowledge bases. Instead of \\\"stuffing\\\" a document, the system retrieves only the top-k most relevant    \n",
+       " chunks from a vector store to populate the context [2][3].\\n\\n### Memory Management\\nAgents require             \n",
+       " sophisticated memory systems to maintain continuity without overflowing context limits:\\n*   **Short-term       \n",
+       " Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages).\\n*   **Long-term      \n",
+       " Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, Vector Stores) and  \n",
+       " retrieved only when relevant to the current query [2].\\n*   **Pruning and Trimming**: Algorithms that           \n",
+       " selectively remove older, less relevant tokens while preserving the semantic core of the conversation           \n",
+       " history.\\n\\n### Summarization and Compression\\n*   **Tool Output Summarization**: Raw outputs from tools (like  \n",
+       " HTML from a web scraper) are often too verbose. An intermediate step or \\\"summarizer agent\\\" distills this data \n",
+       " into key points before passing it to the main reasoning agent [2].\\n*   **Context Compression**: Removing stop  \n",
+       " words or irrelevant sections from retrieved documents to maximize information density per token.\\n\\n### Prompt  \n",
+       " Structuring\\nUsing strict formats within the context window helps the model parse information correctly:\\n*     \n",
+       " **Separation of Concerns**: Clearly formatting \\\"System Instructions,\\\" \\\"User Input,\\\" and \\\"Tool Outputs\\\"    \n",
+       " (often using XML tags or JSON schemas) prevents the model from confusing data with instructions.\\n\\n## 4.       \n",
+       " Architectural Frameworks\\nTwo primary frameworks dominate the implementation of these                           \n",
+       " strategies:\\n\\n**LangChain**\\n*   **Focus**: Flexible orchestration and agent workflows.\\n*   **Approach**:     \n",
+       " Uses \\\"Chains\\\" to pass context sequentially between steps and \\\"Memory\\\" classes (e.g.,                        \n",
+       " `ConversationSummaryMemory`) to automatically manage history compression and persistence [4].\\n*                \n",
+       " **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its own      \n",
+       " context.\\n\\n**LlamaIndex**\\n*   **Focus**: Data indexing and advanced retrieval.\\n*   **Approach**: specialized \n",
+       " in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`).\\n*   **Strategy**: Uses           \n",
+       " \\\"Response Synthesizers\\\" to optimize how retrieved context is presented to the LLM (e.g., `compact` mode to    \n",
+       " stuff chunks or `tree_summarize` for hierarchical reduction) [4].\\n\\n## 5. Strategic Patterns\\n\\n### RAG vs.    \n",
+       " Long-Context Windows (LCW)\\n*   **RAG**: Preferred for cost efficiency and low latency. It filters noise before \n",
+       " the model sees it, often resulting in higher accuracy for \\\"Needle in a Haystack\\\" retrieval tasks [3].\\n*      \n",
+       " **Long-Context Models (e.g., Gemini 1.5)**: Best for \\\"whole-document\\\" reasoning where global context is       \n",
+       " required. However, they are more expensive and prone to the \\\"Lost in the Middle\\\" effect.\\n\\n### Context       \n",
+       " Selection vs. Stuffing\\n*   **Context Stuffing**: Naively filling the window with all available data. This is   \n",
+       " generally an anti-pattern as it increases costs and distracts the model.\\n*   **Context Selection**: The        \n",
+       " proactive curation of inputs. Techniques include \\\"Node Postprocessors\\\" (filtering results by similarity       \n",
+       " score) and \\\"Query Transformations\\\" (rewriting vague user queries to fetch better context) [4].\\n\\n## 6.       \n",
+       " Conclusion\\nContext engineering is the bridge between raw data and intelligent agent behavior. By moving from   \n",
+       " naive context stuffing to sophisticated dynamic injection, memory management, and structured retrieval,         \n",
+       " developers can build agents that are both powerful and cost-effective. The choice between RAG architectures and \n",
+       " long-context models remains a key trade-off, with hybrid approaches often providing the best balance of         \n",
+       " accuracy and performance.\\n\\n### Sources\\n[1] Context Engineering Guide:                                        \n",
+       " https://www.promptingguide.ai/agents/context-engineering\\n[2] Context Engineering Strategies (n8n):             \n",
+       " https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/\\n[3] RAG vs Long      \n",
+       " Context Window Study: https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs\\n[4]  \n",
+       " LangChain & LlamaIndex Comparison:                                                                              \n",
+       " https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \n",
+       " in-large-2b5f3fabc366\"                                                                                          \n",
        " }                                                                                                               \n",
-       "    ID: 75657a73-9ad8-4815-96b4-615298b4ac95                                                                     \n",
+       "    ID: 589357a0-fbce-4941-9090-8f91d12880cb                                                                     \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"in_progress\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: ea8aeaae-7e6a-4f8b-beba-1f252efabd24 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m 🔧 Tool Call: write_file \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"# Context Engineering Approaches for Building AI Agents\\n\\nContext Engineering is the systematic \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m practice of designing, curating, and managing the information state (context) provided to an AI agent. Unlike \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m prompt engineering, which focuses on crafting static instructions, context engineering is an architectural \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m discipline concerned with the dynamic flow of information\\u2014history, tools, and data\\u2014to ensure agent \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m reliability over long-horizon tasks.\\n\\n## Core Principles\\n\\nEffective context engineering aims to solve \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \\\"context rot\\\" and attention scarcity by optimizing the signal-to-noise ratio within the agent's limited \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m context window.\\n\\n* **Dynamic State Management**: Moving beyond static prompts to dynamic environments where \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m context is retrieved, pruned, and updated in real-time [1].\\n* **Signal Maximization**: Prioritizing \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m high-value tokens. As context length increases, a model's ability to retrieve specific details decreases; thus, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m reducing noise is critical for accuracy [2].\\n* **Distinction from Prompt Engineering**:\\n * *Prompt \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Engineering*: \\\"How do I ask this question to get the best answer?\\\"\\n * *Context Engineering*: \\\"How do I \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m architect the agent's memory and environment so it has the right information to answer any question?\\\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [1].\\n\\n## Structural Approaches\\n\\n### 1. Retrieval-Augmented Generation (RAG) Patterns\\nModern agents use \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m advanced RAG patterns to inject context dynamically rather than stuffing the window.\\n* **Agentic RAG**: A \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m multi-step process where the agent actively critiques retrieved data and iteratively queries until it has \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m sufficient context, rather than accepting the first retrieval result [3].\\n* **GraphRAG**: Uses knowledge \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m graphs to structure data hierarchically. This allows agents to answer \\\"global\\\" questions (e.g., \\\"What are \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m the main themes in this dataset?\\\") by traversing community summaries, a capability often missing in standard \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m vector search [4].\\n* **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m post-retrieval steps (re-ranking/pruning) to refine the context before it reaches the LLM [3].\\n\\n### 2. Memory \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Architectures\\nTo handle long-running tasks, agents employ split-memory architectures mimicking human \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m cognition.\\n* **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m hierarchy. It explicitly moves information between \\\"active context\\\" (RAM) and \\\"archival storage\\\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m (Disk/Vector DB) to manage limited context windows autonomously [5].\\n* **Short-Term vs. Long-Term**:\\n * \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling summaries.\\n \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for infinite \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m storage [5].\\n* **Episodic Memory**: Storing sequences of past actions and results to enable few-shot \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m learning from previous experiences.\\n\\n### 3. Context Management Strategies\\n* **Sliding Windows**: Breaking \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m long documents into overlapping chunks (e.g., 512 tokens with 128-token overlap) to process streams of data \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m without losing boundary context [6].\\n* **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m multiple levels of abstraction. Agents first scan high-level summaries and only expand relevant sections into \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m full text [4].\\n\\n## Optimization & Compression Techniques\\n\\nAs context windows fill, performance degrades and \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m costs rise. Optimization techniques aim to compress information without losing semantic value.\\n\\n### 1. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization Strategies\\n* **Rolling Summaries**: Periodically \\\"melting down\\\" the conversation history \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m into a concise narrative. This frees up space but risks losing specific details over time [7].\\n* **Hybrid \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Memory**: A robust approach that keeps \\\"pinned\\\" messages (system prompts, user instructions) verbatim while \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compressing intermediate conversation turns. This prevents critical instructions from being distorted by \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m summarization artifacts [7].\\n\\n### 2. Algorithmic Compression\\n* **Selective Context**: Uses a smaller \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m language model (like GPT-2) to calculate the \\\"self-information\\\" (perplexity) of tokens. Predictable or \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m redundant tokens are pruned, retaining only high-information content [8].\\n* **LLMLingua**: A coarse-to-fine \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compression framework that uses a budget controller to allocate compression ratios dynamically. It creates a \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compressed prompt that retains the original instruction's intent while reducing token count by up to 20x \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [9].\\n\\n### 3. Dynamic Token Pruning\\n* **LazyLLM**: A specialized inference technique that selectively \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m computes the Key-Value (KV) cache only for tokens important for the *next* token prediction. This allows the \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m model to \\\"lazy load\\\" context, speeding up processing for long contexts [10].\\n\\n## Implementation Tools\\n* \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter and \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compress documents before they enter the agent's context [11].\\n* **Slimcontext**: A lightweight library for \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m managing hybrid memory (pinned + summarized) in Node.js agents [7].\\n\\n### Sources\\n[1] HowAIWorks.ai, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \\\"Context Engineering: AI Agent Optimization Guide,\\\" Oct 2025.\\n[2] Anthropic Engineering, \\\"Effective context \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m engineering for AI agents.\\\"\\n[3] Advanced & Modular RAG Patterns: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://ai-radar.aoe.com/architecture-pattern/rag/\\n[4] GraphRAG Overview: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://microsoft.github.io/graphrag/\\n[5] Agent Memory Systems: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge\\n[6] LLM \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/\\n[7] Don\\u2019t Let Your AI Agent \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Forget: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m tory-a2d5284539f1\\n[8] Selective Context: https://github.com/liyucheng09/Selective_Context\\n[9] LLMLingua: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://arxiv.org/html/2310.05736v2\\n[10] LazyLLM: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://machinelearning.apple.com/research/dynamic-token-pruning\\n[11] LangChain Contextual Compression: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"file_path\": \"/final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/final_report.md\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"# Context Engineering Approaches for AI Agents\\n\\n## 1. Introduction\\nContext Engineering is the \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m systematic practice of designing, managing, and optimizing the information provided to AI models to ensure \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m accurate, consistent, and context-aware behavior. While often confused with prompt engineering (which focuses \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m on static instructions), context engineering treats the AI's context window as a dynamic, finite resource that \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m must be architected and managed throughout the lifecycle of an interaction. It is a critical discipline for \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m building robust AI agents that can maintain state and relevance over long, complex tasks.\\n\\n## 2. Core \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Concepts\\nTo build effective agents, developers must manage several fundamental aspects of the Large Language \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Model (LLM) environment:\\n\\n* **Context Window**: The hard limit on the amount of text (tokens) an LLM can \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m process at once (e.g., 8k, 128k, or 1M tokens). Effective engineering ensures this space is used \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m efficiently.\\n* **State Management**: Tracking the \\\"world state\\\" across multi-turn conversations. This \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m includes user intent, task progress, and tool outputs.\\n* **Context Pollution**: The phenomenon where \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m irrelevant or contradictory information in the context degrades model performance, leading to hallucinations or \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m confusion [2].\\n* **Serial Position Effect**: The tendency of LLMs to pay more attention to information at \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the very beginning and very end of the context window, often ignoring details buried in the middle (\\\"Lost in \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the Middle\\\") [3].\\n\\n## 3. Key Approaches and Techniques\\n\\n### Dynamic Context Injection & Retrieval \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (RAG)\\nRather than loading all available data at once, agents \\\"pull\\\" information only when necessary.\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data only \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m after identifying a specific knowledge gap [2].\\n* **RAG (Retrieval-Augmented Generation)**: The standard for \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m large knowledge bases. Instead of \\\"stuffing\\\" a document, the system retrieves only the top-k most relevant \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m chunks from a vector store to populate the context [2][3].\\n\\n### Memory Management\\nAgents require \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m sophisticated memory systems to maintain continuity without overflowing context limits:\\n* **Short-term \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages).\\n* **Long-term \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, Vector Stores) and \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m retrieved only when relevant to the current query [2].\\n* **Pruning and Trimming**: Algorithms that \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m selectively remove older, less relevant tokens while preserving the semantic core of the conversation \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m history.\\n\\n### Summarization and Compression\\n* **Tool Output Summarization**: Raw outputs from tools (like \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m HTML from a web scraper) are often too verbose. An intermediate step or \\\"summarizer agent\\\" distills this data \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m into key points before passing it to the main reasoning agent [2].\\n* **Context Compression**: Removing stop \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m words or irrelevant sections from retrieved documents to maximize information density per token.\\n\\n### Prompt \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Structuring\\nUsing strict formats within the context window helps the model parse information correctly:\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Separation of Concerns**: Clearly formatting \\\"System Instructions,\\\" \\\"User Input,\\\" and \\\"Tool Outputs\\\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (often using XML tags or JSON schemas) prevents the model from confusing data with instructions.\\n\\n## 4. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Architectural Frameworks\\nTwo primary frameworks dominate the implementation of these \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m strategies:\\n\\n**LangChain**\\n* **Focus**: Flexible orchestration and agent workflows.\\n* **Approach**: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Uses \\\"Chains\\\" to pass context sequentially between steps and \\\"Memory\\\" classes (e.g., \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m `ConversationSummaryMemory`) to automatically manage history compression and persistence [4].\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its own \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m context.\\n\\n**LlamaIndex**\\n* **Focus**: Data indexing and advanced retrieval.\\n* **Approach**: specialized \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`).\\n* **Strategy**: Uses \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \\\"Response Synthesizers\\\" to optimize how retrieved context is presented to the LLM (e.g., `compact` mode to \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m stuff chunks or `tree_summarize` for hierarchical reduction) [4].\\n\\n## 5. Strategic Patterns\\n\\n### RAG vs. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Long-Context Windows (LCW)\\n* **RAG**: Preferred for cost efficiency and low latency. It filters noise before \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the model sees it, often resulting in higher accuracy for \\\"Needle in a Haystack\\\" retrieval tasks [3].\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Long-Context Models (e.g., Gemini 1.5)**: Best for \\\"whole-document\\\" reasoning where global context is \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m required. However, they are more expensive and prone to the \\\"Lost in the Middle\\\" effect.\\n\\n### Context \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Selection vs. Stuffing\\n* **Context Stuffing**: Naively filling the window with all available data. This is \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m generally an anti-pattern as it increases costs and distracts the model.\\n* **Context Selection**: The \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m proactive curation of inputs. Techniques include \\\"Node Postprocessors\\\" (filtering results by similarity \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m score) and \\\"Query Transformations\\\" (rewriting vague user queries to fetch better context) [4].\\n\\n## 6. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Conclusion\\nContext engineering is the bridge between raw data and intelligent agent behavior. By moving from \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m naive context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m developers can build agents that are both powerful and cost-effective. The choice between RAG architectures and \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m long-context models remains a key trade-off, with hybrid approaches often providing the best balance of \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m accuracy and performance.\\n\\n### Sources\\n[1] Context Engineering Guide: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m https://www.promptingguide.ai/agents/context-engineering\\n[2] Context Engineering Strategies (n8n): \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/\\n[3] RAG vs Long \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Context Window Study: https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs\\n[4] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m LangChain & LlamaIndex Comparison: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m in-large-2b5f3fabc366\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 75657a73-9ad8-4815-96b4-615298b4ac95 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 589357a0-fbce-4941-9090-8f91d12880cb \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
+       " Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'},      \n",
+       " {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'},  \n",
+       " {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'in_progress'},         \n",
+       " {'content': 'Verify report against original request', 'status': 'pending'}]                                     \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'in_progress'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Verify report against original request', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -1548,60 +1330,22 @@ "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
-       " 🔧 Tool Call: write_todos                                                                                       \n",
+       " 🔧 Tool Call: read_file                                                                                         \n",
        "    Args: {                                                                                                      \n",
-       "   \"todos\": [                                                                                                    \n",
-       "     {                                                                                                           \n",
-       "       \"content\": \"Research core concepts and definitions of context engineering in AI agents\",                  \n",
-       "       \"status\": \"completed\"                                                                                     \n",
-       "     },                                                                                                          \n",
-       "     {                                                                                                           \n",
-       "       \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems,   \n",
-       " Sliding windows)\",                                                                                              \n",
-       "       \"status\": \"completed\"                                                                                     \n",
-       "     },                                                                                                          \n",
-       "     {                                                                                                           \n",
-       "       \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression,           \n",
-       " Summarization, Token selection)\",                                                                               \n",
-       "       \"status\": \"completed\"                                                                                     \n",
-       "     },                                                                                                          \n",
-       "     {                                                                                                           \n",
-       "       \"content\": \"Write final report on context engineering approaches\",                                        \n",
-       "       \"status\": \"completed\"                                                                                     \n",
-       "     }                                                                                                           \n",
-       "   ]                                                                                                             \n",
+       "   \"file_path\": \"/research_request.md\"                                                                           \n",
        " }                                                                                                               \n",
-       "    ID: 41f5ec4f-9cc1-42e5-bfe5-578e82689f9d                                                                     \n",
+       "    ID: dacd96a0-c106-462a-a710-6fc389064df5                                                                     \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: read_file \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research core concepts and definitions of context engineering in AI agents\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Sliding windows)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization, Token selection)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Write final report on context engineering approaches\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/research_request.md\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 41f5ec4f-9cc1-42e5-bfe5-578e82689f9d \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: dacd96a0-c106-462a-a710-6fc389064df5 \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1612,21 +1356,13 @@ "data": { "text/html": [ "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
-       " Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', \n",
-       " 'status': 'completed'}, {'content': 'Research structural techniques for context management (e.g., RAG,          \n",
-       " Long-term memory systems, Sliding windows)', 'status': 'completed'}, {'content': 'Research context optimization \n",
-       " and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'completed'}, \n",
-       " {'content': 'Write final report on context engineering approaches', 'status': 'completed'}]                     \n",
+       "      1  research context engineering approaches used to build AI agents                                         \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m 'status': 'completed'}, {'content': 'Research structural techniques for context management (e.g., RAG, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Long-term memory systems, Sliding windows)', 'status': 'completed'}, {'content': 'Research context optimization \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m {'content': 'Write final report on context engineering approaches', 'status': 'completed'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 1 research context engineering approaches used to build AI agents \u001b[33m│\u001b[0m\n", "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1637,57 +1373,384 @@ "data": { "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n",
-       " The research on context engineering approaches for AI agents is complete. I have compiled a detailed report     \n",
-       " covering core principles, structural architectures (like RAG and Memory Systems), and optimization techniques   \n",
-       " (such as compression and pruning).                                                                              \n",
        "                                                                                                                 \n",
-       " ### Key Findings                                                                                                \n",
-       " *   **Definition**: Context engineering is the architectural practice of managing the agent's information       \n",
-       " environment (history, tools, data) to ensure reliability, as opposed to just crafting static prompts.           \n",
-       " *   **Structural Techniques**:                                                                                  \n",
-       "     *   **RAG Patterns**: GraphRAG and Agentic RAG allow for dynamic and hierarchical information retrieval.    \n",
-       "     *   **Memory**: \"MemGPT\" style architectures that manage Short-Term vs. Long-Term memory autonomously.      \n",
-       " *   **Optimization**:                                                                                           \n",
-       "     *   **Compression**: Algorithms like **LLMLingua** and **Selective Context** to reduce token usage while    \n",
-       " preserving meaning.                                                                                             \n",
-       "     *   **Summarization**: \"Rolling summaries\" and \"Hybrid memory\" to balance history retention with context    \n",
-       " limits.                                                                                                         \n",
+       " 🔧 Tool Call: write_todos                                                                                       \n",
+       "    Args: {                                                                                                      \n",
+       "   \"todos\": [                                                                                                    \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Save research request to /research_request.md\"                                                \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Research context engineering approaches for AI agents using a sub-agent\"                      \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Synthesize findings and write final report to /final_report.md\"                               \n",
+       "     },                                                                                                          \n",
+       "     {                                                                                                           \n",
+       "       \"status\": \"completed\",                                                                                    \n",
+       "       \"content\": \"Verify report against original request\"                                                       \n",
+       "     }                                                                                                           \n",
+       "   ]                                                                                                             \n",
+       " }                                                                                                               \n",
+       "    ID: 4c420284-baa7-4a5b-90ab-e156b502c709                                                                     \n",
        "                                                                                                                 \n",
-       " The full report has been saved to `/final_report.md`.                                                           \n",
-       "                                                                                                                 \n",
-       " ### Sources                                                                                                     \n",
-       " [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025.                                \n",
-       " [2] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/                         \n",
-       " [3] LLMLingua: https://arxiv.org/html/2310.05736v2                                                              \n",
-       " [4] GraphRAG Overview: https://microsoft.github.io/graphrag/                                                    \n",
+       " 🔧 Tool Call: read_file                                                                                         \n",
+       "    Args: {                                                                                                      \n",
+       "   \"file_path\": \"/final_report.md\"                                                                               \n",
+       " }                                                                                                               \n",
+       "    ID: 2d15c15f-589e-4c3d-98b0-03116707c143                                                                     \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", - "\u001b[37m│\u001b[0m The research on context engineering approaches for AI agents is complete. I have compiled a detailed report \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m covering core principles, structural architectures (like RAG and Memory Systems), and optimization techniques \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m (such as compression and pruning). \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ### Key Findings \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Definition**: Context engineering is the architectural practice of managing the agent's information \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m environment (history, tools, data) to ensure reliability, as opposed to just crafting static prompts. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Structural Techniques**: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **RAG Patterns**: GraphRAG and Agentic RAG allow for dynamic and hierarchical information retrieval. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Memory**: \"MemGPT\" style architectures that manage Short-Term vs. Long-Term memory autonomously. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Optimization**: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Compression**: Algorithms like **LLMLingua** and **Selective Context** to reduce token usage while \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m preserving meaning. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Summarization**: \"Rolling summaries\" and \"Hybrid memory\" to balance history retention with context \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m limits. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 4c420284-baa7-4a5b-90ab-e156b502c709 \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m The full report has been saved to `/final_report.md`. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: read_file \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 2d15c15f-589e-4c3d-98b0-03116707c143 \u001b[37m│\u001b[0m\n", + "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
+       " Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'},      \n",
+       " {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'},  \n",
+       " {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'completed'},           \n",
+       " {'content': 'Verify report against original request', 'status': 'completed'}]                                   \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Verify report against original request', 'status': 'completed'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n",
+       "      1  # Context Engineering Approaches for AI Agents                                                          \n",
+       "      2                                                                                                          \n",
+       "      3  ## 1. Introduction                                                                                      \n",
+       "      4  Context Engineering is the systematic practice of designing, managing, and optimizing the information   \n",
+       " provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with     \n",
+       " prompt engineering (which focuses on static instructions), context engineering treats the AI's context window   \n",
+       " as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an interaction.  \n",
+       " It is a critical discipline for building robust AI agents that can maintain state and relevance over long,      \n",
+       " complex tasks.                                                                                                  \n",
+       "      5                                                                                                          \n",
+       "      6  ## 2. Core Concepts                                                                                     \n",
+       "      7  To build effective agents, developers must manage several fundamental aspects of the Large Language     \n",
+       " Model (LLM) environment:                                                                                        \n",
+       "      8                                                                                                          \n",
+       "      9  *   **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., \n",
+       " 8k, 128k, or 1M tokens). Effective engineering ensures this space is used efficiently.                          \n",
+       "     10  *   **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes     \n",
+       " user intent, task progress, and tool outputs.                                                                   \n",
+       "     11  *   **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context  \n",
+       " degrades model performance, leading to hallucinations or confusion [2].                                         \n",
+       "     12  *   **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very   \n",
+       " beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the         \n",
+       " Middle\") [3].                                                                                                   \n",
+       "     13                                                                                                          \n",
+       "     14  ## 3. Key Approaches and Techniques                                                                     \n",
+       "     15                                                                                                          \n",
+       "     16  ### Dynamic Context Injection & Retrieval (RAG)                                                         \n",
+       "     17  Rather than loading all available data at once, agents \"pull\" information only when necessary.          \n",
+       "     18  *   **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time   \n",
+       " data only after identifying a specific knowledge gap [2].                                                       \n",
+       "     19  *   **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of        \n",
+       " \"stuffing\" a document, the system retrieves only the top-k most relevant chunks from a vector store to populate \n",
+       " the context [2][3].                                                                                             \n",
+       "     20                                                                                                          \n",
+       "     21  ### Memory Management                                                                                   \n",
+       "     22  Agents require sophisticated memory systems to maintain continuity without overflowing context limits:  \n",
+       "     23  *   **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10         \n",
+       " messages).                                                                                                      \n",
+       "     24  *   **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases        \n",
+       " (Postgres, Vector Stores) and retrieved only when relevant to the current query [2].                            \n",
+       "     25  *   **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while      \n",
+       " preserving the semantic core of the conversation history.                                                       \n",
+       "     26                                                                                                          \n",
+       "     27  ### Summarization and Compression                                                                       \n",
+       "     28  *   **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too  \n",
+       " verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to the \n",
+       " main reasoning agent [2].                                                                                       \n",
+       "     29  *   **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to     \n",
+       " maximize information density per token.                                                                         \n",
+       "     30                                                                                                          \n",
+       "     31  ### Prompt Structuring                                                                                  \n",
+       "     32  Using strict formats within the context window helps the model parse information correctly:             \n",
+       "     33  *   **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool       \n",
+       " Outputs\" (often using XML tags or JSON schemas) prevents the model from confusing data with instructions.       \n",
+       "     34                                                                                                          \n",
+       "     35  ## 4. Architectural Frameworks                                                                          \n",
+       "     36  Two primary frameworks dominate the implementation of these strategies:                                 \n",
+       "     37                                                                                                          \n",
+       "     38  **LangChain**                                                                                           \n",
+       "     39  *   **Focus**: Flexible orchestration and agent workflows.                                              \n",
+       "     40  *   **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g.,  \n",
+       " `ConversationSummaryMemory`) to automatically manage history compression and persistence [4].                   \n",
+       "     41  *   **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to  \n",
+       " its own context.                                                                                                \n",
+       "     42                                                                                                          \n",
+       "     43  **LlamaIndex**                                                                                          \n",
+       "     44  *   **Focus**: Data indexing and advanced retrieval.                                                    \n",
+       "     45  *   **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`,           \n",
+       " `TreeIndex`).                                                                                                   \n",
+       "     46  *   **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the    \n",
+       " LLM (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4].                  \n",
+       "     47                                                                                                          \n",
+       "     48  ## 5. Strategic Patterns                                                                                \n",
+       "     49                                                                                                          \n",
+       "     50  ### RAG vs. Long-Context Windows (LCW)                                                                  \n",
+       "     51  *   **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it,  \n",
+       " often resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3].                              \n",
+       "     52  *   **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global        \n",
+       " context is required. However, they are more expensive and prone to the \"Lost in the Middle\" effect.             \n",
+       "     53                                                                                                          \n",
+       "     54  ### Context Selection vs. Stuffing                                                                      \n",
+       "     55  *   **Context Stuffing**: Naively filling the window with all available data. This is generally an      \n",
+       " anti-pattern as it increases costs and distracts the model.                                                     \n",
+       "     56  *   **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\"   \n",
+       " (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch      \n",
+       " better context) [4].                                                                                            \n",
+       "     57                                                                                                          \n",
+       "     58  ## 6. Conclusion                                                                                        \n",
+       "     59  Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive \n",
+       " context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers    \n",
+       " can build agents that are both powerful and cost-effective. The choice between RAG architectures and            \n",
+       " long-context models remains a key trade-off, with hybrid approaches often providing the best balance of         \n",
+       " accuracy and performance.                                                                                       \n",
+       "     60                                                                                                          \n",
+       "     61  ### Sources                                                                                             \n",
+       "     62  [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering                 \n",
+       "     63  [2] Context Engineering Strategies (n8n):                                                               \n",
+       " https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/                       \n",
+       "     64  [3] RAG vs Long Context Window Study:                                                                   \n",
+       " https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs                             \n",
+       "     65  [4] LangChain & LlamaIndex Comparison:                                                                  \n",
+       " https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \n",
+       " in-large-2b5f3fabc366                                                                                           \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m 1 # Context Engineering Approaches for AI Agents \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 2 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 3 ## 1. Introduction \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 4 Context Engineering is the systematic practice of designing, managing, and optimizing the information \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m prompt engineering (which focuses on static instructions), context engineering treats the AI's context window \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an interaction. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m It is a critical discipline for building robust AI agents that can maintain state and relevance over long, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m complex tasks. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 5 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 6 ## 2. Core Concepts \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 7 To build effective agents, developers must manage several fundamental aspects of the Large Language \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Model (LLM) environment: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 8 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 9 * **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 8k, 128k, or 1M tokens). Effective engineering ensures this space is used efficiently. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 10 * **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m user intent, task progress, and tool outputs. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 11 * **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m degrades model performance, leading to hallucinations or confusion [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 12 * **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Middle\") [3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 13 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 14 ## 3. Key Approaches and Techniques \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 15 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 16 ### Dynamic Context Injection & Retrieval (RAG) \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 17 Rather than loading all available data at once, agents \"pull\" information only when necessary. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 18 * **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m data only after identifying a specific knowledge gap [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 19 * **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m \"stuffing\" a document, the system retrieves only the top-k most relevant chunks from a vector store to populate \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m the context [2][3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 20 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 21 ### Memory Management \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 22 Agents require sophisticated memory systems to maintain continuity without overflowing context limits: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 23 * **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m messages). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 24 * **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m (Postgres, Vector Stores) and retrieved only when relevant to the current query [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 25 * **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m preserving the semantic core of the conversation history. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 26 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 27 ### Summarization and Compression \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 28 * **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m main reasoning agent [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 29 * **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m maximize information density per token. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 30 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 31 ### Prompt Structuring \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 32 Using strict formats within the context window helps the model parse information correctly: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 33 * **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Outputs\" (often using XML tags or JSON schemas) prevents the model from confusing data with instructions. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 34 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 35 ## 4. Architectural Frameworks \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 36 Two primary frameworks dominate the implementation of these strategies: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 37 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 38 **LangChain** \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 39 * **Focus**: Flexible orchestration and agent workflows. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 40 * **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g., \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m `ConversationSummaryMemory`) to automatically manage history compression and persistence [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 41 * **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m its own context. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 42 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 43 **LlamaIndex** \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 44 * **Focus**: Data indexing and advanced retrieval. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 45 * **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m `TreeIndex`). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 46 * **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m LLM (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 47 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 48 ## 5. Strategic Patterns \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 49 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 50 ### RAG vs. Long-Context Windows (LCW) \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 51 * **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m often resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 52 * **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context is required. However, they are more expensive and prone to the \"Lost in the Middle\" effect. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 53 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 54 ### Context Selection vs. Stuffing \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 55 * **Context Stuffing**: Naively filling the window with all available data. This is generally an \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m anti-pattern as it increases costs and distracts the model. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 56 * **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\" \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m better context) [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 57 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 58 ## 6. Conclusion \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 59 Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m can build agents that are both powerful and cost-effective. The choice between RAG architectures and \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m long-context models remains a key trade-off, with hybrid approaches often providing the best balance of \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m accuracy and performance. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 60 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 61 ### Sources \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 62 [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 63 [2] Context Engineering Strategies (n8n): \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 64 [3] RAG vs Long Context Window Study: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 65 [4] LangChain & LlamaIndex Comparison: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m in-large-2b5f3fabc366 \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n",
+       " I have completed the research on context engineering approaches for AI agents. The final report details         \n",
+       " definitions, core concepts, key techniques (RAG, Memory, Summarization), and architectural frameworks           \n",
+       " (LangChain vs LlamaIndex).                                                                                      \n",
+       "                                                                                                                 \n",
+       " ### **Research Findings Summary**                                                                               \n",
+       " Context engineering is the discipline of managing the finite \"context window\" of an AI model to ensure it has   \n",
+       " the right information at the right time. It is distinct from prompt engineering and focuses on dynamic state    \n",
+       " management.                                                                                                     \n",
+       "                                                                                                                 \n",
+       " **Key Approaches:**                                                                                             \n",
+       " 1.  **Dynamic Context Injection (RAG)**: Instead of loading all data, agents \"pull\" specific information from   \n",
+       " vector databases or APIs only when needed to answer a query.                                                    \n",
+       " 2.  **Memory Management**: Using sliding windows (short-term memory) and external databases (long-term memory)  \n",
+       " to maintain conversation history without exceeding token limits.                                                \n",
+       " 3.  **Summarization**: Compressing verbose tool outputs (like web scrape results) before passing them to the    \n",
+       " reasoning agent to save tokens.                                                                                 \n",
+       " 4.  **Architectural Frameworks**:                                                                               \n",
+       "     *   **LangChain**: Focuses on \"Chains\" and dynamic tool selection.                                          \n",
+       "     *   **LlamaIndex**: Focuses on data indexing and advanced retrieval strategies.                             \n",
+       "                                                                                                                 \n",
+       " **Strategic Trade-offs:**                                                                                       \n",
+       " *   **RAG vs. Long-Context**: RAG is often more cost-effective and accurate for finding specific facts          \n",
+       " (\"needles\") in large datasets, while massive context windows (e.g., 1M+ tokens) are better for holistic         \n",
+       " analysis but prone to the \"Lost in the Middle\" phenomenon.                                                      \n",
+       " *   **Selection vs. Stuffing**: \"Context Stuffing\" (filling the window blindly) is an anti-pattern. \"Context    \n",
+       " Selection\" (curating inputs via filtering and query transformation) leads to better performance.                \n",
+       "                                                                                                                 \n",
+       " The full detailed report can be found in `/final_report.md`.                                                    \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", + "\u001b[37m│\u001b[0m I have completed the research on context engineering approaches for AI agents. The final report details \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m definitions, core concepts, key techniques (RAG, Memory, Summarization), and architectural frameworks \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (LangChain vs LlamaIndex). \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ### Sources \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [2] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [3] LLMLingua: https://arxiv.org/html/2310.05736v2 \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [4] GraphRAG Overview: https://microsoft.github.io/graphrag/ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ### **Research Findings Summary** \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Context engineering is the discipline of managing the finite \"context window\" of an AI model to ensure it has \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the right information at the right time. It is distinct from prompt engineering and focuses on dynamic state \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m management. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Key Approaches:** \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 1. **Dynamic Context Injection (RAG)**: Instead of loading all data, agents \"pull\" specific information from \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m vector databases or APIs only when needed to answer a query. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 2. **Memory Management**: Using sliding windows (short-term memory) and external databases (long-term memory) \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m to maintain conversation history without exceeding token limits. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 3. **Summarization**: Compressing verbose tool outputs (like web scrape results) before passing them to the \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m reasoning agent to save tokens. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 4. **Architectural Frameworks**: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **LangChain**: Focuses on \"Chains\" and dynamic tool selection. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **LlamaIndex**: Focuses on data indexing and advanced retrieval strategies. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Strategic Trade-offs:** \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **RAG vs. Long-Context**: RAG is often more cost-effective and accurate for finding specific facts \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (\"needles\") in large datasets, while massive context windows (e.g., 1M+ tokens) are better for holistic \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m analysis but prone to the \"Lost in the Middle\" phenomenon. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **Selection vs. Stuffing**: \"Context Stuffing\" (filling the window blindly) is an anti-pattern. \"Context \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Selection\" (curating inputs via filtering and query transformation) leads to better performance. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m The full detailed report can be found in `/final_report.md`. \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1706,13 +1769,12 @@ " ],\n", " }, \n", ")\n", - "\n", "format_messages(result[\"messages\"])" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 13, "id": "188b5ab5", "metadata": {}, "outputs": [ @@ -1721,105 +1783,107 @@ "text/html": [ "
╭──────────────────────────────────────────────────── Prompt ─────────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
-       "  # Context Engineering Approaches for Building AI Agents                                                        \n",
+       "  # Context Engineering Approaches for AI Agents                                                                 \n",
        "                                                                                                                 \n",
-       "  Context Engineering is the systematic practice of designing, curating, and managing the information state      \n",
-       "  (context) provided to an AI agent. Unlike prompt engineering, which focuses on crafting static instructions,   \n",
-       "  context engineering is an architectural discipline concerned with the dynamic flow of information—history,     \n",
-       "  tools, and data—to ensure agent reliability over long-horizon tasks.                                           \n",
+       "  ## 1. Introduction                                                                                             \n",
+       "  Context Engineering is the systematic practice of designing, managing, and optimizing the information          \n",
+       "  provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with    \n",
+       "  prompt engineering (which focuses on static instructions), context engineering treats the AI's context window  \n",
+       "  as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an              \n",
+       "  interaction. It is a critical discipline for building robust AI agents that can maintain state and relevance   \n",
+       "  over long, complex tasks.                                                                                      \n",
        "                                                                                                                 \n",
-       "  ## Core Principles                                                                                             \n",
+       "  ## 2. Core Concepts                                                                                            \n",
+       "  To build effective agents, developers must manage several fundamental aspects of the Large Language Model      \n",
+       "  (LLM) environment:                                                                                             \n",
        "                                                                                                                 \n",
-       "  Effective context engineering aims to solve \"context rot\" and attention scarcity by optimizing the             \n",
-       "  signal-to-noise ratio within the agent's limited context window.                                               \n",
+       "  *   **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., 8k,    \n",
+       "  128k, or 1M tokens). Effective engineering ensures this space is used efficiently.                             \n",
+       "  *   **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes user       \n",
+       "  intent, task progress, and tool outputs.                                                                       \n",
+       "  *   **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context         \n",
+       "  degrades model performance, leading to hallucinations or confusion [2].                                        \n",
+       "  *   **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very          \n",
+       "  beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the        \n",
+       "  Middle\") [3].                                                                                                  \n",
        "                                                                                                                 \n",
-       "  *   **Dynamic State Management**: Moving beyond static prompts to dynamic environments where context is        \n",
-       "  retrieved, pruned, and updated in real-time [1].                                                               \n",
-       "  *   **Signal Maximization**: Prioritizing high-value tokens. As context length increases, a model's ability    \n",
-       "  to retrieve specific details decreases; thus, reducing noise is critical for accuracy [2].                     \n",
-       "  *   **Distinction from Prompt Engineering**:                                                                   \n",
-       "      *   *Prompt Engineering*: \"How do I ask this question to get the best answer?\"                             \n",
-       "      *   *Context Engineering*: \"How do I architect the agent's memory and environment so it has the right      \n",
-       "  information to answer any question?\" [1].                                                                      \n",
+       "  ## 3. Key Approaches and Techniques                                                                            \n",
        "                                                                                                                 \n",
-       "  ## Structural Approaches                                                                                       \n",
+       "  ### Dynamic Context Injection & Retrieval (RAG)                                                                \n",
+       "  Rather than loading all available data at once, agents \"pull\" information only when necessary.                 \n",
+       "  *   **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data     \n",
+       "  only after identifying a specific knowledge gap [2].                                                           \n",
+       "  *   **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of \"stuffing\" a  \n",
+       "  document, the system retrieves only the top-k most relevant chunks from a vector store to populate the         \n",
+       "  context [2][3].                                                                                                \n",
        "                                                                                                                 \n",
-       "  ### 1. Retrieval-Augmented Generation (RAG) Patterns                                                           \n",
-       "  Modern agents use advanced RAG patterns to inject context dynamically rather than stuffing the window.         \n",
-       "  *   **Agentic RAG**: A multi-step process where the agent actively critiques retrieved data and iteratively    \n",
-       "  queries until it has sufficient context, rather than accepting the first retrieval result [3].                 \n",
-       "  *   **GraphRAG**: Uses knowledge graphs to structure data hierarchically. This allows agents to answer         \n",
-       "  \"global\" questions (e.g., \"What are the main themes in this dataset?\") by traversing community summaries, a    \n",
-       "  capability often missing in standard vector search [4].                                                        \n",
-       "  *   **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and post-retrieval steps     \n",
-       "  (re-ranking/pruning) to refine the context before it reaches the LLM [3].                                      \n",
+       "  ### Memory Management                                                                                          \n",
+       "  Agents require sophisticated memory systems to maintain continuity without overflowing context limits:         \n",
+       "  *   **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages).     \n",
+       "  *   **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres,    \n",
+       "  Vector Stores) and retrieved only when relevant to the current query [2].                                      \n",
+       "  *   **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while preserving  \n",
+       "  the semantic core of the conversation history.                                                                 \n",
        "                                                                                                                 \n",
-       "  ### 2. Memory Architectures                                                                                    \n",
-       "  To handle long-running tasks, agents employ split-memory architectures mimicking human cognition.              \n",
-       "  *   **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory hierarchy. It        \n",
-       "  explicitly moves information between \"active context\" (RAM) and \"archival storage\" (Disk/Vector DB) to manage  \n",
-       "  limited context windows autonomously [5].                                                                      \n",
-       "  *   **Short-Term vs. Long-Term**:                                                                              \n",
-       "      *   *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling        \n",
-       "  summaries.                                                                                                     \n",
-       "      *   *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for        \n",
-       "  infinite storage [5].                                                                                          \n",
-       "  *   **Episodic Memory**: Storing sequences of past actions and results to enable few-shot learning from        \n",
-       "  previous experiences.                                                                                          \n",
+       "  ### Summarization and Compression                                                                              \n",
+       "  *   **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too         \n",
+       "  verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to    \n",
+       "  the main reasoning agent [2].                                                                                  \n",
+       "  *   **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to maximize   \n",
+       "  information density per token.                                                                                 \n",
        "                                                                                                                 \n",
-       "  ### 3. Context Management Strategies                                                                           \n",
-       "  *   **Sliding Windows**: Breaking long documents into overlapping chunks (e.g., 512 tokens with 128-token      \n",
-       "  overlap) to process streams of data without losing boundary context [6].                                       \n",
-       "  *   **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at multiple levels of abstraction.      \n",
-       "  Agents first scan high-level summaries and only expand relevant sections into full text [4].                   \n",
+       "  ### Prompt Structuring                                                                                         \n",
+       "  Using strict formats within the context window helps the model parse information correctly:                    \n",
+       "  *   **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool Outputs\"     \n",
+       "  (often using XML tags or JSON schemas) prevents the model from confusing data with instructions.               \n",
        "                                                                                                                 \n",
-       "  ## Optimization & Compression Techniques                                                                       \n",
+       "  ## 4. Architectural Frameworks                                                                                 \n",
+       "  Two primary frameworks dominate the implementation of these strategies:                                        \n",
        "                                                                                                                 \n",
-       "  As context windows fill, performance degrades and costs rise. Optimization techniques aim to compress          \n",
-       "  information without losing semantic value.                                                                     \n",
+       "  **LangChain**                                                                                                  \n",
+       "  *   **Focus**: Flexible orchestration and agent workflows.                                                     \n",
+       "  *   **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g.,         \n",
+       "  `ConversationSummaryMemory`) to automatically manage history compression and persistence [4].                  \n",
+       "  *   **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its     \n",
+       "  own context.                                                                                                   \n",
        "                                                                                                                 \n",
-       "  ### 1. Summarization Strategies                                                                                \n",
-       "  *   **Rolling Summaries**: Periodically \"melting down\" the conversation history into a concise narrative.      \n",
-       "  This frees up space but risks losing specific details over time [7].                                           \n",
-       "  *   **Hybrid Memory**: A robust approach that keeps \"pinned\" messages (system prompts, user instructions)      \n",
-       "  verbatim while compressing intermediate conversation turns. This prevents critical instructions from being     \n",
-       "  distorted by summarization artifacts [7].                                                                      \n",
+       "  **LlamaIndex**                                                                                                 \n",
+       "  *   **Focus**: Data indexing and advanced retrieval.                                                           \n",
+       "  *   **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`).    \n",
+       "  *   **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the LLM       \n",
+       "  (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4].                     \n",
        "                                                                                                                 \n",
-       "  ### 2. Algorithmic Compression                                                                                 \n",
-       "  *   **Selective Context**: Uses a smaller language model (like GPT-2) to calculate the \"self-information\"      \n",
-       "  (perplexity) of tokens. Predictable or redundant tokens are pruned, retaining only high-information content    \n",
-       "  [8].                                                                                                           \n",
-       "  *   **LLMLingua**: A coarse-to-fine compression framework that uses a budget controller to allocate            \n",
-       "  compression ratios dynamically. It creates a compressed prompt that retains the original instruction's intent  \n",
-       "  while reducing token count by up to 20x [9].                                                                   \n",
+       "  ## 5. Strategic Patterns                                                                                       \n",
        "                                                                                                                 \n",
-       "  ### 3. Dynamic Token Pruning                                                                                   \n",
-       "  *   **LazyLLM**: A specialized inference technique that selectively computes the Key-Value (KV) cache only     \n",
-       "  for tokens important for the *next* token prediction. This allows the model to \"lazy load\" context, speeding   \n",
-       "  up processing for long contexts [10].                                                                          \n",
+       "  ### RAG vs. Long-Context Windows (LCW)                                                                         \n",
+       "  *   **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, often   \n",
+       "  resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3].                                   \n",
+       "  *   **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global context is    \n",
+       "  required. However, they are more expensive and prone to the \"Lost in the Middle\" effect.                       \n",
        "                                                                                                                 \n",
-       "  ## Implementation Tools                                                                                        \n",
-       "  *   **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter   \n",
-       "  and compress documents before they enter the agent's context [11].                                             \n",
-       "  *   **Slimcontext**: A lightweight library for managing hybrid memory (pinned + summarized) in Node.js agents  \n",
-       "  [7].                                                                                                           \n",
+       "  ### Context Selection vs. Stuffing                                                                             \n",
+       "  *   **Context Stuffing**: Naively filling the window with all available data. This is generally an             \n",
+       "  anti-pattern as it increases costs and distracts the model.                                                    \n",
+       "  *   **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\"          \n",
+       "  (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch     \n",
+       "  better context) [4].                                                                                           \n",
+       "                                                                                                                 \n",
+       "  ## 6. Conclusion                                                                                               \n",
+       "  Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive        \n",
+       "  context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers   \n",
+       "  can build agents that are both powerful and cost-effective. The choice between RAG architectures and           \n",
+       "  long-context models remains a key trade-off, with hybrid approaches often providing the best balance of        \n",
+       "  accuracy and performance.                                                                                      \n",
        "                                                                                                                 \n",
        "  ### Sources                                                                                                    \n",
-       "  [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025.                               \n",
-       "  [2] Anthropic Engineering, \"Effective context engineering for AI agents.\"                                      \n",
-       "  [3] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/                        \n",
-       "  [4] GraphRAG Overview: https://microsoft.github.io/graphrag/                                                   \n",
-       "  [5] Agent Memory Systems:                                                                                      \n",
-       "  https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge          \n",
-       "  [6] LLM Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/                              \n",
-       "  [7] Don’t Let Your AI Agent Forget:                                                                            \n",
-       "  https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-h  \n",
-       "  istory-a2d5284539f1                                                                                            \n",
-       "  [8] Selective Context: https://github.com/liyucheng09/Selective_Context                                        \n",
-       "  [9] LLMLingua: https://arxiv.org/html/2310.05736v2                                                             \n",
-       "  [10] LazyLLM: https://machinelearning.apple.com/research/dynamic-token-pruning                                 \n",
-       "  [11] LangChain Contextual Compression:                                                                         \n",
-       "  https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html     \n",
+       "  [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering                        \n",
+       "  [2] Context Engineering Strategies (n8n):                                                                      \n",
+       "  https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/                      \n",
+       "  [3] RAG vs Long Context Window Study:                                                                          \n",
+       "  https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs                            \n",
+       "  [4] LangChain & LlamaIndex Comparison:                                                                         \n",
+       "  https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperabilit  \n",
+       "  y-in-large-2b5f3fabc366                                                                                        \n",
        "                                                                                                                 \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
        "
\n" @@ -1827,105 +1891,107 @@ "text/plain": [ "\u001b[34m╭─\u001b[0m\u001b[34m───────────────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;32mPrompt\u001b[0m\u001b[34m \u001b[0m\u001b[34m────────────────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m # Context Engineering Approaches for Building AI Agents \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m # Context Engineering Approaches for AI Agents \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Context Engineering is the systematic practice of designing, curating, and managing the information state \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m (context) provided to an AI agent. Unlike prompt engineering, which focuses on crafting static instructions, \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m context engineering is an architectural discipline concerned with the dynamic flow of information—history, \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m tools, and data—to ensure agent reliability over long-horizon tasks. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 1. Introduction\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Context Engineering is the systematic practice of designing, managing, and optimizing the information \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m prompt engineering (which focuses on static instructions), context engineering treats the AI's context window \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m interaction. It is a critical discipline for building robust AI agents that can maintain state and relevance \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m over long, complex tasks. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Core Principles\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 2. Core Concepts\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m To build effective agents, developers must manage several fundamental aspects of the Large Language Model \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (LLM) environment: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Effective context engineering aims to solve \"context rot\" and attention scarcity by optimizing the \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m signal-to-noise ratio within the agent's limited context window. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., 8k, \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 128k, or 1M tokens). Effective engineering ensures this space is used efficiently. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes user \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m intent, task progress, and tool outputs. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m degrades model performance, leading to hallucinations or confusion [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Middle\") [3]. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Dynamic State Management**: Moving beyond static prompts to dynamic environments where context is \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m retrieved, pruned, and updated in real-time [1]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Signal Maximization**: Prioritizing high-value tokens. As context length increases, a model's ability \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m to retrieve specific details decreases; thus, reducing noise is critical for accuracy [2]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Distinction from Prompt Engineering**: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Prompt Engineering*: \"How do I ask this question to get the best answer?\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Context Engineering*: \"How do I architect the agent's memory and environment so it has the right \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m information to answer any question?\" [1]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 3. Key Approaches and Techniques\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Structural Approaches\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Dynamic Context Injection & Retrieval (RAG)\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Rather than loading all available data at once, agents \"pull\" information only when necessary. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m only after identifying a specific knowledge gap [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of \"stuffing\" a \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m document, the system retrieves only the top-k most relevant chunks from a vector store to populate the \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m context [2][3]. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 1. Retrieval-Augmented Generation (RAG) Patterns\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Modern agents use advanced RAG patterns to inject context dynamically rather than stuffing the window. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Agentic RAG**: A multi-step process where the agent actively critiques retrieved data and iteratively \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m queries until it has sufficient context, rather than accepting the first retrieval result [3]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **GraphRAG**: Uses knowledge graphs to structure data hierarchically. This allows agents to answer \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \"global\" questions (e.g., \"What are the main themes in this dataset?\") by traversing community summaries, a \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m capability often missing in standard vector search [4]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and post-retrieval steps \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m (re-ranking/pruning) to refine the context before it reaches the LLM [3]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Memory Management\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Agents require sophisticated memory systems to maintain continuity without overflowing context limits: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages). \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Vector Stores) and retrieved only when relevant to the current query [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while preserving \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m the semantic core of the conversation history. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 2. Memory Architectures\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m To handle long-running tasks, agents employ split-memory architectures mimicking human cognition. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory hierarchy. It \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m explicitly moves information between \"active context\" (RAM) and \"archival storage\" (Disk/Vector DB) to manage \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m limited context windows autonomously [5]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Short-Term vs. Long-Term**: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m summaries. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m infinite storage [5]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Episodic Memory**: Storing sequences of past actions and results to enable few-shot learning from \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m previous experiences. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Summarization and Compression\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m the main reasoning agent [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to maximize \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m information density per token. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 3. Context Management Strategies\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Sliding Windows**: Breaking long documents into overlapping chunks (e.g., 512 tokens with 128-token \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m overlap) to process streams of data without losing boundary context [6]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at multiple levels of abstraction. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Agents first scan high-level summaries and only expand relevant sections into full text [4]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Prompt Structuring\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Using strict formats within the context window helps the model parse information correctly: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool Outputs\" \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (often using XML tags or JSON schemas) prevents the model from confusing data with instructions. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Optimization & Compression Techniques\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 4. Architectural Frameworks\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Two primary frameworks dominate the implementation of these strategies: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m As context windows fill, performance degrades and costs rise. Optimization techniques aim to compress \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m information without losing semantic value. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **LangChain** \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Focus**: Flexible orchestration and agent workflows. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g., \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m `ConversationSummaryMemory`) to automatically manage history compression and persistence [4]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m own context. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 1. Summarization Strategies\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Rolling Summaries**: Periodically \"melting down\" the conversation history into a concise narrative. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m This frees up space but risks losing specific details over time [7]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Hybrid Memory**: A robust approach that keeps \"pinned\" messages (system prompts, user instructions) \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m verbatim while compressing intermediate conversation turns. This prevents critical instructions from being \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m distorted by summarization artifacts [7]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **LlamaIndex** \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Focus**: Data indexing and advanced retrieval. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`). \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the LLM \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4]. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 2. Algorithmic Compression\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Selective Context**: Uses a smaller language model (like GPT-2) to calculate the \"self-information\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m (perplexity) of tokens. Predictable or redundant tokens are pruned, retaining only high-information content \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [8]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **LLMLingua**: A coarse-to-fine compression framework that uses a budget controller to allocate \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m compression ratios dynamically. It creates a compressed prompt that retains the original instruction's intent \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m while reducing token count by up to 20x [9]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 5. Strategic Patterns\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 3. Dynamic Token Pruning\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **LazyLLM**: A specialized inference technique that selectively computes the Key-Value (KV) cache only \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m for tokens important for the *next* token prediction. This allows the model to \"lazy load\" context, speeding \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m up processing for long contexts [10]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## RAG vs. Long-Context Windows (LCW)\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, often \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global context is \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m required. However, they are more expensive and prone to the \"Lost in the Middle\" effect. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Implementation Tools\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m and compress documents before they enter the agent's context [11]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Slimcontext**: A lightweight library for managing hybrid memory (pinned + summarized) in Node.js agents \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [7]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Context Selection vs. Stuffing\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Stuffing**: Naively filling the window with all available data. This is generally an \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m anti-pattern as it increases costs and distracts the model. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\" \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m better context) [4]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 6. Conclusion\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m can build agents that are both powerful and cost-effective. The choice between RAG architectures and \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m long-context models remains a key trade-off, with hybrid approaches often providing the best balance of \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m accuracy and performance. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [2] Anthropic Engineering, \"Effective context engineering for AI agents.\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [3] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [4] GraphRAG Overview: https://microsoft.github.io/graphrag/ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [5] Agent Memory Systems: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [6] LLM Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [7] Don’t Let Your AI Agent Forget: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-h \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m istory-a2d5284539f1 \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [8] Selective Context: https://github.com/liyucheng09/Selective_Context \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [9] LLMLingua: https://arxiv.org/html/2310.05736v2 \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [10] LazyLLM: https://machinelearning.apple.com/research/dynamic-token-pruning \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [11] LangChain Contextual Compression: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [2] Context Engineering Strategies (n8n): \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [3] RAG vs Long Context Window Study: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [4] LangChain & LlamaIndex Comparison: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperabilit \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m y-in-large-2b5f3fabc366 \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] @@ -1949,7 +2015,7 @@ "source": [ "Trace: \n", "\n", - "https://smith.langchain.com/public/34012df8-ffe1-424b-aa17-efb21e87c53e/r" + "https://smith.langchain.com/public/72d23852-4616-4bcc-8d8a-b0d1905c945b/r" ] }, { diff --git a/deep_research/research_agent/prompts.py b/deep_research/research_agent/prompts.py index cdcbf33..588bec0 100644 --- a/deep_research/research_agent/prompts.py +++ b/deep_research/research_agent/prompts.py @@ -4,11 +4,12 @@ RESEARCH_WORKFLOW_INSTRUCTIONS = """# Research Workflow Follow this workflow for all research requests: -1. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` -2. **Plan**: Create a todo list with write_todos to break down the research into focused tasks -3. **Research**: Delegate research tasks to sub-agents using the task() tool -4. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines below) -5. **Verify**: Read `/research_request.md` to confirm you've addressed all aspects of the original question +1. **Plan**: Create a todo list with write_todos to break down the research into focused tasks +2. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` +3. **Research**: Delegate research tasks to sub-agents using the task() tool - ALWAYS use sub-agents for research, never conduct research yourself +4. **Synthesize**: Review all sub-agent findings and consolidate citations (each unique URL gets one number across all findings) +5. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines below) +6. **Verify**: Read `/research_request.md` and confirm you've addressed all aspects with proper citations and structure ## Research Planning Guidelines - Batch similar research tasks into a single TODO to minimize overhead @@ -48,10 +49,9 @@ Simply list items with details - no introduction needed: - Each section should be comprehensive and detailed - Use bullet points only when listing is more appropriate than prose - -Use numbered citations throughout your report: +**Citation format:** - Cite sources inline using [1], [2], [3] format -- Assign each unique URL a single citation number +- Assign each unique URL a single citation number across ALL sub-agent findings - End report with ### Sources section listing each numbered source - Number sources sequentially without gaps (1,2,3,4...) - Format: [1] Source Title: URL (each on separate line for proper list rendering) @@ -62,7 +62,6 @@ Use numbered citations throughout your report: ### Sources [1] AI Research Paper: https://example.com/paper [2] Industry Analysis: https://example.com/analysis - """ RESEARCHER_INSTRUCTIONS = """You are a research assistant conducting research on the user's input topic. For context, today's date is {date}. @@ -111,20 +110,24 @@ After each search tool call, use think_tool to analyze the results: -When providing your final answer, use numbered citations and include a Sources section: +When providing your findings back to the orchestrator: -1. **Cite sources inline**: Use [1], [2], [3] format when referencing information -2. **Sources section**: End with a numbered list of all sources +1. **Structure your response**: Organize findings with clear headings and detailed explanations +2. **Cite sources inline**: Use [1], [2], [3] format when referencing information from your searches +3. **Include Sources section**: End with ### Sources listing each numbered source with title and URL -Example format: +Example: +``` +## Key Findings Context engineering is a critical technique for AI agents [1]. Studies show that proper context management can improve performance by 40% [2]. ### Sources [1] Context Engineering Guide: https://example.com/context-guide [2] AI Performance Study: https://example.com/study +``` -This allows readers to verify information and explore topics further. +The orchestrator will consolidate citations from all sub-agents into the final report. """ @@ -138,20 +141,28 @@ Your role is to coordinate research by delegating tasks from your TODO list to s ## Delegation Strategy -**Simple queries** → 1 sub-agent: -- "List the top 10 coffee shops in San Francisco" -- "What is quantum computing?" -- "Summarize the history of the internet" +**DEFAULT: Start with 1 sub-agent** for most queries: +- "What is quantum computing?" → 1 sub-agent (general overview) +- "List the top 10 coffee shops in San Francisco" → 1 sub-agent +- "Summarize the history of the internet" → 1 sub-agent +- "Research context engineering for AI agents" → 1 sub-agent (covers all aspects) -**Comparisons** → 1 sub-agent per element: +**ONLY parallelize when the query EXPLICITLY requires comparison or has clearly independent aspects:** + +**Explicit comparisons** → 1 sub-agent per element: - "Compare OpenAI vs Anthropic vs DeepMind AI safety approaches" → 3 parallel sub-agents - "Compare Python vs JavaScript for web development" → 2 parallel sub-agents -**Multi-faceted research** → 1 sub-agent per aspect: -- "Research renewable energy: costs, environmental impact, adoption rates" → 3 parallel sub-agents -- "Analyze the 2024 election: candidates, issues, polling" → 3 parallel sub-agents +**Clearly separated aspects** → 1 sub-agent per aspect (use sparingly): +- "Research renewable energy adoption in Europe, Asia, and North America" → 3 parallel sub-agents (geographic separation) +- Only use this pattern when aspects cannot be covered efficiently by a single comprehensive search -## Parallel Execution +## Key Principles +- **Bias towards single sub-agent**: One comprehensive research task is more token-efficient than multiple narrow ones +- **Avoid premature decomposition**: Don't break "research X" into "research X overview", "research X techniques", "research X applications" - just use 1 sub-agent for all of X +- **Parallelize only for clear comparisons**: Use multiple sub-agents when comparing distinct entities or geographically separated data + +## Parallel Execution Limits - Use at most {max_concurrent_research_units} parallel sub-agents per iteration - Make multiple task() calls in a single response to enable parallel execution - Each sub-agent returns findings independently