diff --git a/.gitignore b/.gitignore
index 4b70b60..b63e58d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,4 +163,7 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
-.langgraph_api/
\ No newline at end of file
+# Other
+.langgraph_api/
+.vscode/
+.DS_Store
\ No newline at end of file
diff --git a/deep_research/agent.py b/deep_research/agent.py
index 7977872..6149254 100644
--- a/deep_research/agent.py
+++ b/deep_research/agent.py
@@ -4,7 +4,10 @@ This module creates a deep research agent with custom tools and prompts
for conducting web research with strategic thinking and context management.
"""
+from datetime import datetime
+
from langchain.chat_models import init_chat_model
+from langchain_google_genai import ChatGoogleGenerativeAI
from deepagents import create_deep_agent
from research_agent.prompts import (
@@ -18,7 +21,10 @@ from research_agent.tools import tavily_search, think_tool
max_concurrent_research_units = 3
max_researcher_iterations = 3
-# Combine instructions
+# Get current date
+current_date = datetime.now().strftime("%Y-%m-%d")
+
+# Combine orchestrator instructions (RESEARCHER_INSTRUCTIONS only for sub-agents)
INSTRUCTIONS = (
RESEARCH_WORKFLOW_INSTRUCTIONS
+ "\n\n"
@@ -28,25 +34,21 @@ INSTRUCTIONS = (
max_concurrent_research_units=max_concurrent_research_units,
max_researcher_iterations=max_researcher_iterations,
)
- + "\n\n"
- + "=" * 80
- + "\n\n"
- + RESEARCHER_INSTRUCTIONS
)
# Create research sub-agent
research_sub_agent = {
"name": "research-agent",
"description": "Delegate research to the sub-agent researcher. Only give this researcher one topic at a time.",
- "system_prompt": RESEARCHER_INSTRUCTIONS,
+ "system_prompt": RESEARCHER_INSTRUCTIONS.format(date=current_date),
"tools": [tavily_search, think_tool],
}
-# Initialize model
-model = init_chat_model(
- model="anthropic:claude-sonnet-4-5-20250929",
- temperature=0.0
-)
+# Model Claude 4.5
+# model = init_chat_model(model="anthropic:claude-sonnet-4-5-20250929", temperature=0.0)
+
+# Model Gemini 3
+model = ChatGoogleGenerativeAI(model="gemini-3-pro-preview", temperature=0.0)
# Create the agent
agent = create_deep_agent(
diff --git a/deep_research/research_agent.ipynb b/deep_research/research_agent.ipynb
index 6c29da8..a110bbd 100644
--- a/deep_research/research_agent.ipynb
+++ b/deep_research/research_agent.ipynb
@@ -94,6 +94,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "from datetime import datetime\n",
"from utils import show_prompt, format_messages\n",
"from research_agent.prompts import (\n",
" RESEARCHER_INSTRUCTIONS,\n",
@@ -161,12 +162,15 @@
"│ </Show Your Thinking> │\n",
"│ │\n",
"│ <Final Response Format> │\n",
- "│ When providing your final answer, use numbered citations and include a Sources section: │\n",
+ "│ When providing your findings back to the orchestrator: │\n",
"│ │\n",
- "│ 1. **Cite sources inline**: Use [1], [2], [3] format when referencing information │\n",
- "│ 2. **Sources section**: End with a numbered list of all sources │\n",
+ "│ 1. **Structure your response**: Organize findings with clear headings and detailed explanations │\n",
+ "│ 2. **Cite sources inline**: Use [1], [2], [3] format when referencing information from your searches │\n",
+ "│ 3. **Include Sources section**: End with ### Sources listing each numbered source with title and URL │\n",
"│ │\n",
- "│ Example format: │\n",
+ "│ Example: │\n",
+ "│ ``` │\n",
+ "│ ## Key Findings │\n",
"│ │\n",
"│ Context engineering is a critical technique for AI agents [1]. Studies show that proper context management │\n",
"│ can improve performance by 40% [2]. │\n",
@@ -174,8 +178,9 @@
"│ ### Sources │\n",
"│ [1] Context Engineering Guide: https://example.com/context-guide │\n",
"│ [2] AI Performance Study: https://example.com/study │\n",
+ "│ ``` │\n",
"│ │\n",
- "│ This allows readers to verify information and explore topics further. │\n",
+ "│ The orchestrator will consolidate citations from all sub-agents into the final report. │\n",
"│ </Final Response Format> │\n",
"│ │\n",
"│ │\n",
@@ -233,12 +238,15 @@
"\u001b[34m│\u001b[0m \u001b[1;34m\u001b[0m \u001b[34m│\u001b[0m\n",
"\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n",
"\u001b[34m│\u001b[0m \u001b[1;34m
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n", "│ │\n", - "│ 🔧 Tool Call: write_file │\n", - "│ Args: { │\n", - "│ \"content\": \"research context engineering approaches used to build AI agents\", │\n", - "│ \"file_path\": \"/research_request.md\" │\n", - "│ } │\n", - "│ ID: a4a0832d-9bae-487c-b52f-164d0b47afdc │\n", - "│ │\n", "│ 🔧 Tool Call: write_todos │\n", "│ Args: { │\n", "│ \"todos\": [ │\n", "│ { │\n", - "│ \"content\": \"Research core concepts and definitions of context engineering in AI agents\", │\n", - "│ \"status\": \"in_progress\" │\n", + "│ \"status\": \"in_progress\", │\n", + "│ \"content\": \"Save research request to /research_request.md\" │\n", "│ }, │\n", "│ { │\n", - "│ \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems, │\n", - "│ Sliding windows)\", │\n", - "│ \"status\": \"pending\" │\n", + "│ \"status\": \"pending\", │\n", + "│ \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" │\n", "│ }, │\n", "│ { │\n", - "│ \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression, │\n", - "│ Summarization, Token selection)\", │\n", - "│ \"status\": \"pending\" │\n", + "│ \"status\": \"pending\", │\n", + "│ \"content\": \"Synthesize findings and write final report to /final_report.md\" │\n", "│ }, │\n", "│ { │\n", - "│ \"content\": \"Write final report on context engineering approaches\", │\n", - "│ \"status\": \"pending\" │\n", + "│ \"status\": \"pending\", │\n", + "│ \"content\": \"Verify report against original request\" │\n", "│ } │\n", "│ ] │\n", "│ } │\n", - "│ ID: 29e25aca-85a1-41f9-9149-4304f30e5fa5 │\n", + "│ ID: 61820e98-a91b-4f74-ac2c-f443af11d389 │\n", + "│ │\n", + "│ 🔧 Tool Call: write_file │\n", + "│ Args: { │\n", + "│ \"file_path\": \"/research_request.md\", │\n", + "│ \"content\": \"research context engineering approaches used to build AI agents\" │\n", + "│ } │\n", + "│ ID: 3a5e0fbf-0181-4d89-9501-c5fac4b3cef2 │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: write_file \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"research context engineering approaches used to build AI agents\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"file_path\": \"/research_request.md\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: a4a0832d-9bae-487c-b52f-164d0b47afdc \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research core concepts and definitions of context engineering in AI agents\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"in_progress\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"in_progress\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Sliding windows)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"pending\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization, Token selection)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"pending\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Write final report on context engineering approaches\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"pending\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 29e25aca-85a1-41f9-9149-4304f30e5fa5 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 61820e98-a91b-4f74-ac2c-f443af11d389 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_file \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/research_request.md\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"research context engineering approaches used to build AI agents\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 3a5e0fbf-0181-4d89-9501-c5fac4b3cef2 \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", + "│ Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'in_progress'}, │\n", + "│ {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'pending'}, │\n", + "│ {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'pending'}, {'content': │\n", + "│ 'Verify report against original request', 'status': 'pending'}] │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'in_progress'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'pending'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'pending'}, {'content': \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 'Verify report against original request', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -852,100 +779,85 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "text/html": [ - "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", - "│ Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', │\n", - "│ 'status': 'in_progress'}, {'content': 'Research structural techniques for context management (e.g., RAG, │\n", - "│ Long-term memory systems, Sliding windows)', 'status': 'pending'}, {'content': 'Research context optimization │\n", - "│ and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'pending'}, │\n", - "│ {'content': 'Write final report on context engineering approaches', 'status': 'pending'}] │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m 'status': 'in_progress'}, {'content': 'Research structural techniques for context management (e.g., RAG, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Long-term memory systems, Sliding windows)', 'status': 'pending'}, {'content': 'Research context optimization \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'pending'}, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m {'content': 'Write final report on context engineering approaches', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", - "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n", "│ │\n", - "│ 🔧 Tool Call: task │\n", + "│ 🔧 Tool Call: write_todos │\n", "│ Args: { │\n", - "│ \"subagent_type\": \"research-agent\", │\n", - "│ \"description\": \"Research the core concepts and definitions of 'context engineering' specifically for AI │\n", - "│ agents. \\nFind out:\\n1. What is context engineering?\\n2. Why is it critical for AI agents?\\n3. Key principles │\n", - "│ (e.g. relevance, coherence, token limits).\\n4. Distinctions between context engineering and prompt │\n", - "│ engineering.\\n\\nReturn a summary of findings with sources.\" │\n", + "│ \"todos\": [ │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Save research request to /research_request.md\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"in_progress\", │\n", + "│ \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"pending\", │\n", + "│ \"content\": \"Synthesize findings and write final report to /final_report.md\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"pending\", │\n", + "│ \"content\": \"Verify report against original request\" │\n", + "│ } │\n", + "│ ] │\n", "│ } │\n", - "│ ID: 60ae692d-a760-4254-8819-d3541d2b6456 │\n", + "│ ID: b317fa1b-6f81-47df-b0ca-aa5e50bd2b70 │\n", "│ │\n", "│ 🔧 Tool Call: task │\n", "│ Args: { │\n", "│ \"subagent_type\": \"research-agent\", │\n", - "│ \"description\": \"Research structural techniques for context management in AI agents.\\nFocus on:\\n1. │\n", - "│ Retrieval-Augmented Generation (RAG) patterns for context injection.\\n2. Memory architectures (Short-term vs │\n", - "│ Long-term, Vector databases).\\n3. Context window management strategies (Sliding windows, Hierarchical │\n", - "│ context).\\n4. Dynamic context selection methods.\\n\\nReturn a summary of findings with sources.\" │\n", + "│ \"description\": \"Research context engineering approaches used to build AI agents.\\nFocus on:\\n1. Definition │\n", + "│ and core concepts of context engineering.\\n2. Key techniques and strategies (e.g., dynamic context injection, │\n", + "│ RAG, memory management/history pruning, summarization, prompt structuring).\\n3. Architectures or frameworks │\n", + "│ often used (e.g., how LangChain or LlamaIndex handle context).\\n4. Approaches for handling context window │\n", + "│ limits vs. utilizing long-context models.\\n5. Specific patterns like \\\"Needle in a Haystack\\\" optimization or │\n", + "│ \\\"Context Stuffing\\\" vs \\\"Context Selection\\\".\\n\\nReturn a comprehensive summary of findings with citations.\" │\n", "│ } │\n", - "│ ID: 8ce46bb7-c73d-47ff-a20d-ff7add14a95a │\n", - "│ │\n", - "│ 🔧 Tool Call: task │\n", - "│ Args: { │\n", - "│ \"subagent_type\": \"research-agent\", │\n", - "│ \"description\": \"Research context optimization and compression techniques for AI agents.\\nFocus on:\\n1. │\n", - "│ Summarization techniques to reduce context size.\\n2. Prompt compression algorithms (e.g., selective context, │\n", - "│ LLMLingua).\\n3. Token selection/pruning strategies.\\n4. Any specific tools or libraries known for this (e.g. │\n", - "│ LangChain's approaches).\\n\\nReturn a summary of findings with sources.\" │\n", - "│ } │\n", - "│ ID: ae5f9ef0-1ebb-48dc-ae07-438f9205f022 │\n", + "│ ID: b682976a-b1fa-47a1-89be-153134ba28c7 │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: task \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"subagent_type\": \"research-agent\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"description\": \"Research the core concepts and definitions of 'context engineering' specifically for AI \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m agents. \\nFind out:\\n1. What is context engineering?\\n2. Why is it critical for AI agents?\\n3. Key principles \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m (e.g. relevance, coherence, token limits).\\n4. Distinctions between context engineering and prompt \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m engineering.\\n\\nReturn a summary of findings with sources.\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"in_progress\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 60ae692d-a760-4254-8819-d3541d2b6456 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: b317fa1b-6f81-47df-b0ca-aa5e50bd2b70 \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m 🔧 Tool Call: task \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \"subagent_type\": \"research-agent\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"description\": \"Research structural techniques for context management in AI agents.\\nFocus on:\\n1. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Retrieval-Augmented Generation (RAG) patterns for context injection.\\n2. Memory architectures (Short-term vs \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Long-term, Vector databases).\\n3. Context window management strategies (Sliding windows, Hierarchical \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m context).\\n4. Dynamic context selection methods.\\n\\nReturn a summary of findings with sources.\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"description\": \"Research context engineering approaches used to build AI agents.\\nFocus on:\\n1. Definition \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m and core concepts of context engineering.\\n2. Key techniques and strategies (e.g., dynamic context injection, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m RAG, memory management/history pruning, summarization, prompt structuring).\\n3. Architectures or frameworks \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m often used (e.g., how LangChain or LlamaIndex handle context).\\n4. Approaches for handling context window \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m limits vs. utilizing long-context models.\\n5. Specific patterns like \\\"Needle in a Haystack\\\" optimization or \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \\\"Context Stuffing\\\" vs \\\"Context Selection\\\".\\n\\nReturn a comprehensive summary of findings with citations.\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 8ce46bb7-c73d-47ff-a20d-ff7add14a95a \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: task \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"subagent_type\": \"research-agent\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"description\": \"Research context optimization and compression techniques for AI agents.\\nFocus on:\\n1. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization techniques to reduce context size.\\n2. Prompt compression algorithms (e.g., selective context, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m LLMLingua).\\n3. Token selection/pruning strategies.\\n4. Any specific tools or libraries known for this (e.g. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m LangChain's approaches).\\n\\nReturn a summary of findings with sources.\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: ae5f9ef0-1ebb-48dc-ae07-438f9205f022 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: b682976a-b1fa-47a1-89be-153134ba28c7 \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -956,423 +868,224 @@ "data": { "text/html": [ "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", - "│ Based on the research, here is a summary of the core concepts of **Context Engineering** for AI agents. │\n", + "│ Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, │\n", + "│ {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': │\n", + "│ 'in_progress'}, {'content': 'Synthesize findings and write final report to /final_report.md', 'status': │\n", + "│ 'pending'}, {'content': 'Verify report against original request', 'status': 'pending'}] │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 'in_progress'}, {'content': 'Synthesize findings and write final report to /final_report.md', 'status': \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 'pending'}, {'content': 'Verify report against original request', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", + "│ Context engineering is the systematic practice of designing, managing, and optimizing the information provided │\n", + "│ to AI models to ensure accurate, consistent, and context-aware behavior. It moves beyond simple prompt │\n", + "│ engineering by treating context as a dynamic resource that must be architected, managed, and validated. │\n", "│ │\n", - "│ ### 1. What is Context Engineering? │\n", - "│ **Context Engineering** is the technical practice of designing, curating, and managing the total information │\n", - "│ state (the \"context\") provided to an AI agent during its operation. Unlike simple prompting, which focuses on a │\n", - "│ single input instruction, context engineering is a **systemic and iterative process** that determines *what* │\n", - "│ information an agent perceives at every step of a workflow [1]. │\n", + "│ ### 1. Definition and Core Concepts │\n", + "│ **Context Engineering** is the \"process of designing, testing, and iterating on the contextual information │\n", + "│ provided to AI agents to shape their behavior and improve task performance\" [1]. Unlike a static prompt, │\n", + "│ context in an agentic system is a living state that changes with every user interaction, tool output, and │\n", + "│ internal reasoning step. │\n", "│ │\n", - "│ It involves configuring the optimal set of tokens—including system instructions, tool definitions, conversation │\n", - "│ history, and external data—to ensure the model behaves reliably over time. Ideally, it answers the question: │\n", - "│ *\"What configuration of context is most likely to generate the model's desired behavior?\"* [2] │\n", + "│ * **Core Concepts**: │\n", + "│ * **Context Window**: The working memory limit of the LLM (e.g., 128k tokens). Managing this finite │\n", + "│ resource is the central challenge. │\n", + "│ * **State Management**: Tracking the \"world state\" (user intent, task progress, tool results) across a │\n", + "│ multi-turn conversation. │\n", + "│ * **Context Pollution**: The degradation of model performance caused by irrelevant or contradictory │\n", + "│ information in the context [2]. │\n", + "│ * **Context Continuity**: Ensuring the agent retains critical information (like user constraints) │\n", + "│ throughout a long interaction without \"forgetting\" them due to truncation. │\n", "│ │\n", - "│ ### 2. Why is it Critical for AI Agents? │\n", - "│ Context engineering is essential for shifting from simple chatbots to autonomous agents that perform complex, │\n", - "│ multi-step tasks. │\n", - "│ * **Combating \"Context Rot\":** As the amount of information (tokens) in a context window increases, a model’s │\n", - "│ ability to retrieve specific details (\"needle-in-a-haystack\") often degrades. This is due to the **attention │\n", - "│ scarcity** inherent in transformer architectures (where computational complexity is $N^2$ relative to context │\n", - "│ length) [1]. │\n", - "│ * **Enabling Long-Horizon Tasks:** Agents working on tasks spanning hours or days (e.g., coding, research) │\n", - "│ accumulate massive amounts of history. Without engineering strategies to manage this, the context window fills │\n", - "│ up, leading to lost instructions, confusion, or crashed sessions [1]. │\n", - "│ * **Reliability & Cost:** Providing \"just enough\" high-signal context reduces hallucination risks and │\n", - "│ operational costs associated with processing unnecessary tokens. │\n", + "│ ### 2. Key Techniques and Strategies │\n", + "│ Effective context engineering employs a mix of retrieval, summarization, and structural strategies: │\n", "│ │\n", - "│ ### 3. Key Principles of Context Engineering │\n", - "│ Effective context engineering relies on providing the **smallest possible set of high-signal tokens**. │\n", + "│ * **Dynamic Context Injection**: Instead of loading all data at once, agents \"pull\" information only when │\n", + "│ needed. For example, an agent might use a `search_tool` to inject real-time web results into the context only │\n", + "│ after identifying a knowledge gap [2]. │\n", + "│ * **Retrieval-Augmented Generation (RAG)**: A standard pattern for handling large knowledge bases. The agent │\n", + "│ retrieves strictly relevant chunks (e.g., top-5 matches) from a vector database to populate the context window, │\n", + "│ rather than stuffing the entire document [2, 3]. │\n", + "│ * **Memory Management**: │\n", + "│ * **Short-term Memory**: Stores immediate conversation history (e.g., last 10 turns). │\n", + "│ * **Long-term Memory**: Persists key facts (e.g., \"User is a vegetarian\") in an external database │\n", + "│ (Postgres, Vector Store) and retrieves them only when relevant [2]. │\n", + "│ * **Pruning/Trimming**: Algorithms that remove older, less relevant tokens to free up space while │\n", + "│ preserving recent interactions [2]. │\n", + "│ * **Summarization**: Compressing verbose tool outputs. Instead of feeding raw HTML from a scraped website, an │\n", + "│ intermediate \"summarizer agent\" distills it into key points before passing it to the main agent [2]. │\n", + "│ * **Prompt Structuring**: Using structured formats (like JSON schemas or XML tags) within the context to │\n", + "│ clearly separate \"System Instructions,\" \"User Input,\" and \"Tool Outputs,\" preventing the model from confusing │\n", + "│ data with instructions. │\n", "│ │\n", - "│ * **Relevance (Signal-to-Noise Ratio):** │\n", - "│ * **Just-in-Time Retrieval:** Instead of dumping all files into the context (traditional RAG), agents │\n", - "│ should be equipped with tools (like `grep` or `ls`) to autonomously \"pull\" only the information they need when │\n", - "│ they need it [2]. │\n", - "│ * **The \"Right Altitude\":** System prompts should avoid being too brittle (hardcoded logic) or too vague. │\n", - "│ They should provide flexible heuristics and distinct sections (e.g., Background, Tools, Output Format). │\n", - "│ * **Coherence & State Management:** │\n", - "│ * **Compaction:** Periodically summarizing conversation history to compress the context while preserving │\n", - "│ critical decisions and unresolved issues [1]. │\n", - "│ * **Structured Note-Taking:** Forcing agents to maintain persistent \"memory files\" (e.g., `scratchpad.md` │\n", - "│ or `todo.md`) outside the context window to track progress across resets. │\n", - "│ * **Token Efficiency:** │\n", - "│ * **Tool Optimization:** Tool definitions should be concise. If a human engineer cannot determine which │\n", - "│ tool to use based on the definition, the agent won't be able to either. Return values from tools should be │\n", - "│ stripped of extraneous data to save space [2]. │\n", + "│ ### 3. Architectures and Frameworks │\n", + "│ Two primary frameworks dominate the landscape, each with distinct approaches to context handling: │\n", "│ │\n", - "│ ### 4. Context Engineering vs. Prompt Engineering │\n", - "│ While prompt engineering is about *crafting the message*, context engineering is about *architecting the │\n", - "│ environment*. │\n", - "│ │\n", - "│ | Feature | Prompt Engineering | Context Engineering | │\n", + "│ | Feature | **LangChain** [3, 4] | **LlamaIndex** [3, 4] | │\n", "│ | :--- | :--- | :--- | │\n", - "│ | **Scope** | Discrete, single-turn interactions. | Continuous, multi-turn workflows and system state. | │\n", - "│ | **Focus** | Finding the \"right words\" or instructions. | Curating the \"optimal set of tokens\" (history, │\n", - "│ tools, data). | │\n", - "│ | **Goal** | Optimizing a specific response. | Maintaining agent reliability and state over time. | │\n", - "│ | **Mechanism** | Static text input. | Dynamic management (retrieval, summarization, pruning). | │\n", - "│ | **Analogy** | Writing a good email to an employee. | Designing the employee's office, filing system, and │\n", - "│ handbook. | │\n", + "│ | **Primary Focus** | Flexible workflow & agent orchestration | Data indexing & advanced retrieval (RAG) | │\n", + "│ | **Memory Classes** | `ConversationBufferMemory` (raw history)<br>`ConversationSummaryMemory` (LLM-summarized │\n", + "│ history)<br>`ChatMessageHistory` (storage backend) | `ChatEngine` with modes:<br>`context` (standard │\n", + "│ RAG)<br>`condense_question` (rewrites queries based on history) | │\n", + "│ | **Context Strategy** | **Chains**: Sequential passing of context between steps.<br>**Agents**: Dynamic tool │\n", + "│ selection to populate context. | **Indices**: `VectorStoreIndex` (semantic search), `ListIndex` (sequential), │\n", + "│ `TreeIndex` (hierarchical summaries). | │\n", + "│ | **Optimization** | `Context Compression`: Removing irrelevant tokens from retrieved docs. | `Response │\n", + "│ Synthesizers`: <br>`compact` (stuffs max chunks into prompt)<br>`tree_summarize` (hierarchical reduction) | │\n", + "│ │\n", + "│ ### 4. Handling Context Limits vs. Long-Context Models │\n", + "│ A critical architectural decision is whether to use RAG or rely on massive context windows (e.g., Gemini 1.5 │\n", + "│ Pro's 2M tokens). │\n", + "│ │\n", + "│ * **RAG + Small Context**: │\n", + "│ * **Pros**: Significantly cheaper (fewer tokens processed), lower latency, and often higher accuracy for │\n", + "│ specific fact retrieval. │\n", + "│ * **Cons**: Can miss \"global\" insights that require reading the entire corpus at once. │\n", + "│ * **Long-Context Models (LCW)**: │\n", + "│ * **Pros**: Can ingest entire books or codebases, enabling \"whole-document\" reasoning. │\n", + "│ * **Cons**: \"Lost in the Middle\" phenomenon where models forget information in the middle of a massive │\n", + "│ context. │\n", + "│ * **Trade-off**: Benchmarks show that RAG systems often outperform LCW models in \"Needle in a Haystack\" │\n", + "│ tests for large corpora (2M+ tokens) because they filter noise before the model sees it [3]. │\n", + "│ │\n", + "│ ### 5. Specific Patterns │\n", + "│ * **Needle in a Haystack Optimization**: │\n", + "│ * **The Problem**: LLMs struggle to find a specific fact (\"needle\") buried in a large amount of │\n", + "│ irrelevant text (\"haystack\"). │\n", + "│ * **Optimization**: Place critical instructions or facts at the very **beginning** or very **end** of the │\n", + "│ context window, as models pay most attention to these areas (Serial Position Effect). │\n", + "│ * **Context Stuffing vs. Context Selection**: │\n", + "│ * **Context Stuffing**: Naively filling the window with as much data as possible. This often leads to │\n", + "│ *context distraction* and higher costs [2]. │\n", + "│ * **Context Selection**: Intentionally curating what enters the window. │\n", + "│ * *Techniques*: Using **Node Postprocessors** (LlamaIndex) to filter retrieved nodes by keywords or │\n", + "│ similarity score before they reach the LLM [4]. │\n", + "│ * *Query Transformations*: Rewriting a user's vague query into a precise search term to select better │\n", + "│ context (e.g., LangChain's `HyDE` or LlamaIndex's `Multi-step query decomposition`) [4]. │\n", "│ │\n", "│ ### Sources │\n", - "│ [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. │\n", - "│ [2] Anthropic Engineering, \"Effective context engineering for AI agents\" (referenced within [1]). │\n", + "│ [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering │\n", + "│ [2] Context Engineering Strategies (n8n): │\n", + "│ https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ │\n", + "│ [3] RAG vs Long Context Window Study: │\n", + "│ https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs │\n", + "│ [4] LangChain & LlamaIndex Comparison: │\n", + "│ https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- │\n", + "│ in-large-2b5f3fabc360 │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Based on the research, here is a summary of the core concepts of **Context Engineering** for AI agents. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Context engineering is the systematic practice of designing, managing, and optimizing the information provided \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m to AI models to ensure accurate, consistent, and context-aware behavior. It moves beyond simple prompt \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m engineering by treating context as a dynamic resource that must be architected, managed, and validated. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 1. What is Context Engineering? \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m **Context Engineering** is the technical practice of designing, curating, and managing the total information \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m state (the \"context\") provided to an AI agent during its operation. Unlike simple prompting, which focuses on a \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m single input instruction, context engineering is a **systemic and iterative process** that determines *what* \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m information an agent perceives at every step of a workflow [1]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 1. Definition and Core Concepts \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m **Context Engineering** is the \"process of designing, testing, and iterating on the contextual information \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m provided to AI agents to shape their behavior and improve task performance\" [1]. Unlike a static prompt, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context in an agentic system is a living state that changes with every user interaction, tool output, and \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m internal reasoning step. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m It involves configuring the optimal set of tokens—including system instructions, tool definitions, conversation \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m history, and external data—to ensure the model behaves reliably over time. Ideally, it answers the question: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m *\"What configuration of context is most likely to generate the model's desired behavior?\"* [2] \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Core Concepts**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Window**: The working memory limit of the LLM (e.g., 128k tokens). Managing this finite \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m resource is the central challenge. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **State Management**: Tracking the \"world state\" (user intent, task progress, tool results) across a \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m multi-turn conversation. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Pollution**: The degradation of model performance caused by irrelevant or contradictory \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m information in the context [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Context Continuity**: Ensuring the agent retains critical information (like user constraints) \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m throughout a long interaction without \"forgetting\" them due to truncation. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 2. Why is it Critical for AI Agents? \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Context engineering is essential for shifting from simple chatbots to autonomous agents that perform complex, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m multi-step tasks. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Combating \"Context Rot\":** As the amount of information (tokens) in a context window increases, a model’s \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ability to retrieve specific details (\"needle-in-a-haystack\") often degrades. This is due to the **attention \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m scarcity** inherent in transformer architectures (where computational complexity is $N^2$ relative to context \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m length) [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Enabling Long-Horizon Tasks:** Agents working on tasks spanning hours or days (e.g., coding, research) \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m accumulate massive amounts of history. Without engineering strategies to manage this, the context window fills \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m up, leading to lost instructions, confusion, or crashed sessions [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Reliability & Cost:** Providing \"just enough\" high-signal context reduces hallucination risks and \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m operational costs associated with processing unnecessary tokens. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 2. Key Techniques and Strategies \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Effective context engineering employs a mix of retrieval, summarization, and structural strategies: \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 3. Key Principles of Context Engineering \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Effective context engineering relies on providing the **smallest possible set of high-signal tokens**. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Dynamic Context Injection**: Instead of loading all data at once, agents \"pull\" information only when \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m needed. For example, an agent might use a `search_tool` to inject real-time web results into the context only \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m after identifying a knowledge gap [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Retrieval-Augmented Generation (RAG)**: A standard pattern for handling large knowledge bases. The agent \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m retrieves strictly relevant chunks (e.g., top-5 matches) from a vector database to populate the context window, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m rather than stuffing the entire document [2, 3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Memory Management**: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Short-term Memory**: Stores immediate conversation history (e.g., last 10 turns). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Long-term Memory**: Persists key facts (e.g., \"User is a vegetarian\") in an external database \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m (Postgres, Vector Store) and retrieves them only when relevant [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Pruning/Trimming**: Algorithms that remove older, less relevant tokens to free up space while \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m preserving recent interactions [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Summarization**: Compressing verbose tool outputs. Instead of feeding raw HTML from a scraped website, an \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m intermediate \"summarizer agent\" distills it into key points before passing it to the main agent [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m * **Prompt Structuring**: Using structured formats (like JSON schemas or XML tags) within the context to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m clearly separate \"System Instructions,\" \"User Input,\" and \"Tool Outputs,\" preventing the model from confusing \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m data with instructions. \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Relevance (Signal-to-Noise Ratio):** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Just-in-Time Retrieval:** Instead of dumping all files into the context (traditional RAG), agents \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m should be equipped with tools (like `grep` or `ls`) to autonomously \"pull\" only the information they need when \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m they need it [2]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **The \"Right Altitude\":** System prompts should avoid being too brittle (hardcoded logic) or too vague. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m They should provide flexible heuristics and distinct sections (e.g., Background, Tools, Output Format). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Coherence & State Management:** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Compaction:** Periodically summarizing conversation history to compress the context while preserving \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m critical decisions and unresolved issues [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Structured Note-Taking:** Forcing agents to maintain persistent \"memory files\" (e.g., `scratchpad.md` \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m or `todo.md`) outside the context window to track progress across resets. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Token Efficiency:** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Tool Optimization:** Tool definitions should be concise. If a human engineer cannot determine which \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m tool to use based on the definition, the agent won't be able to either. Return values from tools should be \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m stripped of extraneous data to save space [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m ### 3. Architectures and Frameworks \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Two primary frameworks dominate the landscape, each with distinct approaches to context handling: \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 4. Context Engineering vs. Prompt Engineering \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m While prompt engineering is about *crafting the message*, context engineering is about *architecting the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m environment*. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | Feature | Prompt Engineering | Context Engineering | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | Feature | **LangChain** [3, 4] | **LlamaIndex** [3, 4] | \u001b[33m│\u001b[0m\n", "\u001b[33m│\u001b[0m | :--- | :--- | :--- | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Scope** | Discrete, single-turn interactions. | Continuous, multi-turn workflows and system state. | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Focus** | Finding the \"right words\" or instructions. | Curating the \"optimal set of tokens\" (history, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m tools, data). | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Goal** | Optimizing a specific response. | Maintaining agent reliability and state over time. | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Mechanism** | Static text input. | Dynamic management (retrieval, summarization, pruning). | \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m | **Analogy** | Writing a good email to an employee. | Designing the employee's office, filing system, and \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m handbook. | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | **Primary Focus** | Flexible workflow & agent orchestration | Data indexing & advanced retrieval (RAG) | \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m | **Memory Classes** | `ConversationBufferMemory` (raw history)
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", - "│ Research into structural techniques for context management in AI agents reveals a shift from static retrieval │\n", - "│ to dynamic, modular architectures that mimic human cognitive processes. │\n", - "│ │\n", - "│ ### 1. Retrieval-Augmented Generation (RAG) Patterns │\n", - "│ Modern RAG has evolved beyond simple vector search into **Modular RAG** architectures [1]. │\n", - "│ * **Pre-Retrieval**: Techniques include **Query Rewriting** (transforming user input into optimal search │\n", - "│ queries) and **Query Expansion** (generating multiple sub-queries to broaden search coverage). │\n", - "│ * **Post-Retrieval**: Retrieved contexts undergo **Re-ranking** (scoring by relevance) and **Pruning** │\n", - "│ (removing redundant information) before reaching the LLM. │\n", - "│ * **GraphRAG**: A structural technique that builds a knowledge graph from the corpus. It uses community │\n", - "│ detection to create hierarchical summaries, allowing the agent to answer \"global\" questions (e.g., \"What are │\n", - "│ the main themes?\") that standard vector search misses [2]. │\n", - "│ * **Agentic RAG**: Treats retrieval as a multi-step reasoning process where an agent actively critiques │\n", - "│ retrieved data and iteratively queries until it has sufficient context [1]. │\n", - "│ │\n", - "│ ### 2. Memory Architectures │\n", - "│ Agent memory is structurally divided to balance immediate reasoning with long-term retention [3]. │\n", - "│ * **Short-Term Memory (STM)**: Resides in the LLM's **context window**. It acts as a \"working memory\" for the │\n", - "│ current session, often managed by summary buffers to prevent overflow. │\n", - "│ * **Long-Term Memory (LTM)**: Implemented via **Vector Databases** (for semantic search) and **Knowledge │\n", - "│ Graphs** (for structured relationships). │\n", - "│ * **Episodic Memory**: Stores specific past experiences (sequences of actions/results) to enable few-shot │\n", - "│ learning from history. │\n", - "│ * **Procedural Memory**: Stores \"skills\" or \"how-to\" knowledge, often encoded as static prompt libraries or │\n", - "│ code tools the agent can invoke. │\n", - "│ * **MemGPT Pattern**: An architecture where the LLM acts as an operating system, autonomously managing its │\n", - "│ own memory by deciding when to move information between context (STM) and storage (LTM) [3]. │\n", - "│ │\n", - "│ ### 3. Context Window Management Strategies │\n", - "│ Techniques to maximize the utility of limited context windows include: │\n", - "│ * **Sliding Windows**: A standard technique where long documents are broken into overlapping chunks (e.g., │\n", - "│ 512 tokens with 128-token overlap) to ensure no context is lost at boundaries during processing [4]. │\n", - "│ * **Hierarchical Context**: Strategies like **GraphRAG** or **Fusion-in-Decoder (FiD)** process information │\n", - "│ at multiple levels of abstraction. Documents are first summarized into high-level vectors; only relevant │\n", - "│ sections are expanded into full text [2, 4]. │\n", - "│ * **Compression**: summarizing or embedding retrieved chunks into dense vectors before they enter the main │\n", - "│ context, allowing the agent to \"see\" more documents at lower resolution. │\n", - "│ │\n", - "│ ### 4. Dynamic Context Selection Methods │\n", - "│ Selecting the *right* context is as critical as retrieving it. │\n", - "│ * **Re-ranking Algorithms**: **Cross-Encoders** (e.g., BERT-based) score the semantic similarity between a │\n", - "│ query and a document more accurately than bi-encoders but are computationally expensive. A common pattern is to │\n", - "│ retrieve 100 documents with fast vector search, then re-rank the top 10 with a cross-encoder [5]. │\n", - "│ * **Semantic Routing**: Using a lightweight classifier to direct queries to the most appropriate specialized │\n", - "│ index or RAG pipeline (e.g., routing technical queries to a documentation index vs. billing queries to a SQL │\n", - "│ database). │\n", - "│ * **Stochastic & Fairness Rankers**: New methods introduce \"stochastic rankers\" to prevent exposure bias │\n", - "│ (where only the most popular docs are ever seen) and ensure diversity in the retrieved context [5]. │\n", - "│ │\n", - "│ ### Sources │\n", - "│ [1] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ │\n", - "│ [2] GraphRAG Overview: https://microsoft.github.io/graphrag/ │\n", - "│ [3] Agent Memory Systems (STM/LTM/Episodic): │\n", - "│ https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge │\n", - "│ [4] LLM Input Formatting & Sliding Windows: │\n", - "│ https://milvus.io/ai-quick-reference/what-modifications-might-be-needed-to-the-llms-input-formatting-or-archite │\n", - "│ cture-to-best-take-advantage-of-retrieved-documents-for-example-adding-special-tokens-or-segments-to-separate-c │\n", - "│ ontext │\n", - "│ [5] Re-ranking & Dynamic Selection: │\n", - "│ https://www.chitika.com/re-ranking-in-retrieval-augmented-generation-how-to-use-re-rankers-in-rag/ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Research into structural techniques for context management in AI agents reveals a shift from static retrieval \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m to dynamic, modular architectures that mimic human cognitive processes. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 1. Retrieval-Augmented Generation (RAG) Patterns \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Modern RAG has evolved beyond simple vector search into **Modular RAG** architectures [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Pre-Retrieval**: Techniques include **Query Rewriting** (transforming user input into optimal search \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m queries) and **Query Expansion** (generating multiple sub-queries to broaden search coverage). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Post-Retrieval**: Retrieved contexts undergo **Re-ranking** (scoring by relevance) and **Pruning** \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m (removing redundant information) before reaching the LLM. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **GraphRAG**: A structural technique that builds a knowledge graph from the corpus. It uses community \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m detection to create hierarchical summaries, allowing the agent to answer \"global\" questions (e.g., \"What are \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m the main themes?\") that standard vector search misses [2]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Agentic RAG**: Treats retrieval as a multi-step reasoning process where an agent actively critiques \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m retrieved data and iteratively queries until it has sufficient context [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 2. Memory Architectures \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Agent memory is structurally divided to balance immediate reasoning with long-term retention [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Short-Term Memory (STM)**: Resides in the LLM's **context window**. It acts as a \"working memory\" for the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m current session, often managed by summary buffers to prevent overflow. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Long-Term Memory (LTM)**: Implemented via **Vector Databases** (for semantic search) and **Knowledge \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Graphs** (for structured relationships). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Episodic Memory**: Stores specific past experiences (sequences of actions/results) to enable few-shot \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m learning from history. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Procedural Memory**: Stores \"skills\" or \"how-to\" knowledge, often encoded as static prompt libraries or \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m code tools the agent can invoke. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **MemGPT Pattern**: An architecture where the LLM acts as an operating system, autonomously managing its \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m own memory by deciding when to move information between context (STM) and storage (LTM) [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 3. Context Window Management Strategies \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Techniques to maximize the utility of limited context windows include: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Sliding Windows**: A standard technique where long documents are broken into overlapping chunks (e.g., \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m 512 tokens with 128-token overlap) to ensure no context is lost at boundaries during processing [4]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Hierarchical Context**: Strategies like **GraphRAG** or **Fusion-in-Decoder (FiD)** process information \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m at multiple levels of abstraction. Documents are first summarized into high-level vectors; only relevant \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m sections are expanded into full text [2, 4]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Compression**: summarizing or embedding retrieved chunks into dense vectors before they enter the main \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m context, allowing the agent to \"see\" more documents at lower resolution. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 4. Dynamic Context Selection Methods \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Selecting the *right* context is as critical as retrieving it. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Re-ranking Algorithms**: **Cross-Encoders** (e.g., BERT-based) score the semantic similarity between a \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m query and a document more accurately than bi-encoders but are computationally expensive. A common pattern is to \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m retrieve 100 documents with fast vector search, then re-rank the top 10 with a cross-encoder [5]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Semantic Routing**: Using a lightweight classifier to direct queries to the most appropriate specialized \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m index or RAG pipeline (e.g., routing technical queries to a documentation index vs. billing queries to a SQL \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m database). \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Stochastic & Fairness Rankers**: New methods introduce \"stochastic rankers\" to prevent exposure bias \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m (where only the most popular docs are ever seen) and ensure diversity in the retrieved context [5]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### Sources \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [2] GraphRAG Overview: https://microsoft.github.io/graphrag/ \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [3] Agent Memory Systems (STM/LTM/Episodic): \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [4] LLM Input Formatting & Sliding Windows: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://milvus.io/ai-quick-reference/what-modifications-might-be-needed-to-the-llms-input-formatting-or-archite \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m cture-to-best-take-advantage-of-retrieved-documents-for-example-adding-special-tokens-or-segments-to-separate-c \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ontext \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [5] Re-ranking & Dynamic Selection: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://www.chitika.com/re-ranking-in-retrieval-augmented-generation-how-to-use-re-rankers-in-rag/ \u001b[33m│\u001b[0m\n", - "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", - "│ Research into context optimization reveals a multi-layered approach ranging from high-level architectural │\n", - "│ strategies (summarization) to low-level token manipulation (pruning). Below is a summary of findings on │\n", - "│ techniques for reducing context size and optimizing AI agent performance. │\n", - "│ │\n", - "│ ### 1. Summarization Techniques │\n", - "│ Summarization is the most common high-level strategy for managing long conversation histories. │\n", - "│ │\n", - "│ * **Rolling Summaries (Incremental Compression):** Instead of retaining the entire history, the system │\n", - "│ periodically \"melts down\" previous turns into a concise summary. This maintains continuity but may lose │\n", - "│ specific nuances over time [1]. │\n", - "│ * **Hybrid Memory:** A robust approach that combines different memory types. It typically preserves \"pinned\" │\n", - "│ key messages (like the system prompt or initial user instructions) verbatim while compressing intermediate │\n", - "│ messages into summaries. This ensures critical instructions are never lost to compression artifacts [1]. │\n", - "│ * **Externalized Memory:** For agents requiring long-term recall, context is offloaded to a vector database. │\n", - "│ Relevant information is retrieved semantically (RAG) rather than keeping everything in the active context │\n", - "│ window [1]. │\n", - "│ │\n", - "│ ### 2. Prompt Compression Algorithms │\n", - "│ These algorithms aim to reduce the number of tokens sent to the model without losing semantic meaning, often by │\n", - "│ analyzing the information density of the text. │\n", - "│ │\n", - "│ * **Selective Context:** This method uses a smaller base language model (like GPT-2) to calculate the │\n", - "│ \"self-information\" (perplexity) of lexical units (tokens, phrases, or sentences). Content with low │\n", - "│ self-information—meaning it is predictable or redundant—is pruned. This can reduce context usage significantly │\n", - "│ while maintaining performance on tasks like summarization and QA [2]. │\n", - "│ * **LLMLingua:** A coarse-to-fine compression framework. It employs a \"budget controller\" to dynamically │\n", - "│ allocate different compression ratios to different parts of the prompt (e.g., compressing few-shot │\n", - "│ demonstrations aggressively while preserving the user's specific instruction). It also aligns the distribution │\n", - "│ between the small compression model and the target LLM to prevent performance degradation [3]. │\n", - "│ │\n", - "│ ### 3. Token Selection & Pruning Strategies │\n", - "│ Token pruning operates at a lower level, often during the inference process itself. │\n", - "│ │\n", - "│ * **Dynamic Token Pruning (e.g., LazyLLM):** Unlike static pruning which removes tokens once and for all, │\n", - "│ dynamic strategies like LazyLLM selectively compute the Key-Value (KV) cache for tokens only when they are │\n", - "│ important for the *next* token prediction. This allows the model to \"lazy load\" context, accelerating the │\n", - "│ pre-filling stage and decoding without fine-tuning [4]. │\n", - "│ * **Token Merging (ToMe):** While originally popular in Vision Transformers, similar concepts are applied to │\n", - "│ LLMs where redundant tokens are merged rather than dropped, preserving their signal while reducing the sequence │\n", - "│ length [3]. │\n", - "│ * **Dynamic Cutoff (Token Budgeting):** A simple LIFO (Last-In, First-Out) strategy where the oldest messages │\n", - "│ are dropped once a strict token limit is reached. While easy to implement, it risks abrupt context loss [1]. │\n", - "│ │\n", - "│ ### 4. Tools and Libraries │\n", - "│ Several libraries provide out-of-the-box implementations for these techniques. │\n", - "│ │\n", - "│ * **LangChain:** Offers a suite of tools specifically for context optimization: │\n", - "│ * **Contextual Compression Retriever:** Wraps a base retriever and compresses the retrieved documents │\n", - "│ before returning them. │\n", - "│ * **LLMChainExtractor:** Uses an LLM chain to extract *only* the relevant sentences from a retrieved │\n", - "│ document, discarding irrelevant fluff. │\n", - "│ * **EmbeddingsFilter:** A cheaper/faster alternative that filters documents based on embedding similarity │\n", - "│ thresholds rather than using an LLM call. │\n", - "│ * **DocumentCompressorPipeline:** Allows chaining multiple transformers (e.g., splitting text -> removing │\n", - "│ redundant documents -> filtering by relevance) [5]. │\n", - "│ * **Slimcontext:** A lightweight Node.js library designed for \"hybrid memory\" management. It automates the │\n", - "│ process of preserving key messages while summarizing intermediate history to fit within a specific token budget │\n", - "│ [1]. │\n", - "│ │\n", - "│ ### Sources │\n", - "│ [1] Don’t Let Your AI Agent Forget: Smarter Strategies for Summarizing Message History: │\n", - "│ https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his │\n", - "│ tory-a2d5284539f1 │\n", - "│ [2] Selective Context GitHub Repository: https://github.com/liyucheng09/Selective_Context │\n", - "│ [3] LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models: │\n", - "│ https://arxiv.org/html/2310.05736v2 │\n", - "│ [4] LazyLLM: Dynamic Token Pruning for Efficient Long Context LLM Inference: │\n", - "│ https://machinelearning.apple.com/research/dynamic-token-pruning │\n", - "│ [5] LangChain Contextual Compression Retriever: │\n", - "│ https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Research into context optimization reveals a multi-layered approach ranging from high-level architectural \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m strategies (summarization) to low-level token manipulation (pruning). Below is a summary of findings on \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m techniques for reducing context size and optimizing AI agent performance. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 1. Summarization Techniques \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Summarization is the most common high-level strategy for managing long conversation histories. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Rolling Summaries (Incremental Compression):** Instead of retaining the entire history, the system \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m periodically \"melts down\" previous turns into a concise summary. This maintains continuity but may lose \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m specific nuances over time [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Hybrid Memory:** A robust approach that combines different memory types. It typically preserves \"pinned\" \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m key messages (like the system prompt or initial user instructions) verbatim while compressing intermediate \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m messages into summaries. This ensures critical instructions are never lost to compression artifacts [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Externalized Memory:** For agents requiring long-term recall, context is offloaded to a vector database. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Relevant information is retrieved semantically (RAG) rather than keeping everything in the active context \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m window [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 2. Prompt Compression Algorithms \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m These algorithms aim to reduce the number of tokens sent to the model without losing semantic meaning, often by \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m analyzing the information density of the text. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Selective Context:** This method uses a smaller base language model (like GPT-2) to calculate the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \"self-information\" (perplexity) of lexical units (tokens, phrases, or sentences). Content with low \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m self-information—meaning it is predictable or redundant—is pruned. This can reduce context usage significantly \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m while maintaining performance on tasks like summarization and QA [2]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **LLMLingua:** A coarse-to-fine compression framework. It employs a \"budget controller\" to dynamically \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m allocate different compression ratios to different parts of the prompt (e.g., compressing few-shot \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m demonstrations aggressively while preserving the user's specific instruction). It also aligns the distribution \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m between the small compression model and the target LLM to prevent performance degradation [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 3. Token Selection & Pruning Strategies \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Token pruning operates at a lower level, often during the inference process itself. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Dynamic Token Pruning (e.g., LazyLLM):** Unlike static pruning which removes tokens once and for all, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m dynamic strategies like LazyLLM selectively compute the Key-Value (KV) cache for tokens only when they are \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m important for the *next* token prediction. This allows the model to \"lazy load\" context, accelerating the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m pre-filling stage and decoding without fine-tuning [4]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Token Merging (ToMe):** While originally popular in Vision Transformers, similar concepts are applied to \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m LLMs where redundant tokens are merged rather than dropped, preserving their signal while reducing the sequence \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m length [3]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Dynamic Cutoff (Token Budgeting):** A simple LIFO (Last-In, First-Out) strategy where the oldest messages \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m are dropped once a strict token limit is reached. While easy to implement, it risks abrupt context loss [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### 4. Tools and Libraries \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Several libraries provide out-of-the-box implementations for these techniques. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **LangChain:** Offers a suite of tools specifically for context optimization: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Contextual Compression Retriever:** Wraps a base retriever and compresses the retrieved documents \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m before returning them. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **LLMChainExtractor:** Uses an LLM chain to extract *only* the relevant sentences from a retrieved \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m document, discarding irrelevant fluff. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **EmbeddingsFilter:** A cheaper/faster alternative that filters documents based on embedding similarity \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m thresholds rather than using an LLM call. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **DocumentCompressorPipeline:** Allows chaining multiple transformers (e.g., splitting text -> removing \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m redundant documents -> filtering by relevance) [5]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m * **Slimcontext:** A lightweight Node.js library designed for \"hybrid memory\" management. It automates the \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m process of preserving key messages while summarizing intermediate history to fit within a specific token budget \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1]. \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m ### Sources \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [1] Don’t Let Your AI Agent Forget: Smarter Strategies for Summarizing Message History: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m tory-a2d5284539f1 \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [2] Selective Context GitHub Repository: https://github.com/liyucheng09/Selective_Context \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [3] LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://arxiv.org/html/2310.05736v2 \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [4] LazyLLM: Dynamic Token Pruning for Efficient Long Context LLM Inference: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://machinelearning.apple.com/research/dynamic-token-pruning \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m [5] LangChain Contextual Compression Retriever: \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [2] Context Engineering Strategies (n8n): \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [3] RAG vs Long Context Window Study: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m [4] LangChain & LlamaIndex Comparison: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m in-large-2b5f3fabc360 \u001b[33m│\u001b[0m\n", "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1384,148 +1097,217 @@ "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n", "│ │\n", + "│ 🔧 Tool Call: write_todos │\n", + "│ Args: { │\n", + "│ \"todos\": [ │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Save research request to /research_request.md\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"in_progress\", │\n", + "│ \"content\": \"Synthesize findings and write final report to /final_report.md\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"pending\", │\n", + "│ \"content\": \"Verify report against original request\" │\n", + "│ } │\n", + "│ ] │\n", + "│ } │\n", + "│ ID: ea8aeaae-7e6a-4f8b-beba-1f252efabd24 │\n", + "│ │\n", "│ 🔧 Tool Call: write_file │\n", "│ Args: { │\n", - "│ \"content\": \"# Context Engineering Approaches for Building AI Agents\\n\\nContext Engineering is the systematic │\n", - "│ practice of designing, curating, and managing the information state (context) provided to an AI agent. Unlike │\n", - "│ prompt engineering, which focuses on crafting static instructions, context engineering is an architectural │\n", - "│ discipline concerned with the dynamic flow of information\\u2014history, tools, and data\\u2014to ensure agent │\n", - "│ reliability over long-horizon tasks.\\n\\n## Core Principles\\n\\nEffective context engineering aims to solve │\n", - "│ \\\"context rot\\\" and attention scarcity by optimizing the signal-to-noise ratio within the agent's limited │\n", - "│ context window.\\n\\n* **Dynamic State Management**: Moving beyond static prompts to dynamic environments where │\n", - "│ context is retrieved, pruned, and updated in real-time [1].\\n* **Signal Maximization**: Prioritizing │\n", - "│ high-value tokens. As context length increases, a model's ability to retrieve specific details decreases; thus, │\n", - "│ reducing noise is critical for accuracy [2].\\n* **Distinction from Prompt Engineering**:\\n * *Prompt │\n", - "│ Engineering*: \\\"How do I ask this question to get the best answer?\\\"\\n * *Context Engineering*: \\\"How do I │\n", - "│ architect the agent's memory and environment so it has the right information to answer any question?\\\" │\n", - "│ [1].\\n\\n## Structural Approaches\\n\\n### 1. Retrieval-Augmented Generation (RAG) Patterns\\nModern agents use │\n", - "│ advanced RAG patterns to inject context dynamically rather than stuffing the window.\\n* **Agentic RAG**: A │\n", - "│ multi-step process where the agent actively critiques retrieved data and iteratively queries until it has │\n", - "│ sufficient context, rather than accepting the first retrieval result [3].\\n* **GraphRAG**: Uses knowledge │\n", - "│ graphs to structure data hierarchically. This allows agents to answer \\\"global\\\" questions (e.g., \\\"What are │\n", - "│ the main themes in this dataset?\\\") by traversing community summaries, a capability often missing in standard │\n", - "│ vector search [4].\\n* **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and │\n", - "│ post-retrieval steps (re-ranking/pruning) to refine the context before it reaches the LLM [3].\\n\\n### 2. Memory │\n", - "│ Architectures\\nTo handle long-running tasks, agents employ split-memory architectures mimicking human │\n", - "│ cognition.\\n* **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory │\n", - "│ hierarchy. It explicitly moves information between \\\"active context\\\" (RAM) and \\\"archival storage\\\" │\n", - "│ (Disk/Vector DB) to manage limited context windows autonomously [5].\\n* **Short-Term vs. Long-Term**:\\n * │\n", - "│ *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling summaries.\\n │\n", - "│ * *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for infinite │\n", - "│ storage [5].\\n* **Episodic Memory**: Storing sequences of past actions and results to enable few-shot │\n", - "│ learning from previous experiences.\\n\\n### 3. Context Management Strategies\\n* **Sliding Windows**: Breaking │\n", - "│ long documents into overlapping chunks (e.g., 512 tokens with 128-token overlap) to process streams of data │\n", - "│ without losing boundary context [6].\\n* **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at │\n", - "│ multiple levels of abstraction. Agents first scan high-level summaries and only expand relevant sections into │\n", - "│ full text [4].\\n\\n## Optimization & Compression Techniques\\n\\nAs context windows fill, performance degrades and │\n", - "│ costs rise. Optimization techniques aim to compress information without losing semantic value.\\n\\n### 1. │\n", - "│ Summarization Strategies\\n* **Rolling Summaries**: Periodically \\\"melting down\\\" the conversation history │\n", - "│ into a concise narrative. This frees up space but risks losing specific details over time [7].\\n* **Hybrid │\n", - "│ Memory**: A robust approach that keeps \\\"pinned\\\" messages (system prompts, user instructions) verbatim while │\n", - "│ compressing intermediate conversation turns. This prevents critical instructions from being distorted by │\n", - "│ summarization artifacts [7].\\n\\n### 2. Algorithmic Compression\\n* **Selective Context**: Uses a smaller │\n", - "│ language model (like GPT-2) to calculate the \\\"self-information\\\" (perplexity) of tokens. Predictable or │\n", - "│ redundant tokens are pruned, retaining only high-information content [8].\\n* **LLMLingua**: A coarse-to-fine │\n", - "│ compression framework that uses a budget controller to allocate compression ratios dynamically. It creates a │\n", - "│ compressed prompt that retains the original instruction's intent while reducing token count by up to 20x │\n", - "│ [9].\\n\\n### 3. Dynamic Token Pruning\\n* **LazyLLM**: A specialized inference technique that selectively │\n", - "│ computes the Key-Value (KV) cache only for tokens important for the *next* token prediction. This allows the │\n", - "│ model to \\\"lazy load\\\" context, speeding up processing for long contexts [10].\\n\\n## Implementation Tools\\n* │\n", - "│ **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter and │\n", - "│ compress documents before they enter the agent's context [11].\\n* **Slimcontext**: A lightweight library for │\n", - "│ managing hybrid memory (pinned + summarized) in Node.js agents [7].\\n\\n### Sources\\n[1] HowAIWorks.ai, │\n", - "│ \\\"Context Engineering: AI Agent Optimization Guide,\\\" Oct 2025.\\n[2] Anthropic Engineering, \\\"Effective context │\n", - "│ engineering for AI agents.\\\"\\n[3] Advanced & Modular RAG Patterns: │\n", - "│ https://ai-radar.aoe.com/architecture-pattern/rag/\\n[4] GraphRAG Overview: │\n", - "│ https://microsoft.github.io/graphrag/\\n[5] Agent Memory Systems: │\n", - "│ https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge\\n[6] LLM │\n", - "│ Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/\\n[7] Don\\u2019t Let Your AI Agent │\n", - "│ Forget: │\n", - "│ https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his │\n", - "│ tory-a2d5284539f1\\n[8] Selective Context: https://github.com/liyucheng09/Selective_Context\\n[9] LLMLingua: │\n", - "│ https://arxiv.org/html/2310.05736v2\\n[10] LazyLLM: │\n", - "│ https://machinelearning.apple.com/research/dynamic-token-pruning\\n[11] LangChain Contextual Compression: │\n", - "│ https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html\", │\n", - "│ \"file_path\": \"/final_report.md\" │\n", + "│ \"file_path\": \"/final_report.md\", │\n", + "│ \"content\": \"# Context Engineering Approaches for AI Agents\\n\\n## 1. Introduction\\nContext Engineering is the │\n", + "│ systematic practice of designing, managing, and optimizing the information provided to AI models to ensure │\n", + "│ accurate, consistent, and context-aware behavior. While often confused with prompt engineering (which focuses │\n", + "│ on static instructions), context engineering treats the AI's context window as a dynamic, finite resource that │\n", + "│ must be architected and managed throughout the lifecycle of an interaction. It is a critical discipline for │\n", + "│ building robust AI agents that can maintain state and relevance over long, complex tasks.\\n\\n## 2. Core │\n", + "│ Concepts\\nTo build effective agents, developers must manage several fundamental aspects of the Large Language │\n", + "│ Model (LLM) environment:\\n\\n* **Context Window**: The hard limit on the amount of text (tokens) an LLM can │\n", + "│ process at once (e.g., 8k, 128k, or 1M tokens). Effective engineering ensures this space is used │\n", + "│ efficiently.\\n* **State Management**: Tracking the \\\"world state\\\" across multi-turn conversations. This │\n", + "│ includes user intent, task progress, and tool outputs.\\n* **Context Pollution**: The phenomenon where │\n", + "│ irrelevant or contradictory information in the context degrades model performance, leading to hallucinations or │\n", + "│ confusion [2].\\n* **Serial Position Effect**: The tendency of LLMs to pay more attention to information at │\n", + "│ the very beginning and very end of the context window, often ignoring details buried in the middle (\\\"Lost in │\n", + "│ the Middle\\\") [3].\\n\\n## 3. Key Approaches and Techniques\\n\\n### Dynamic Context Injection & Retrieval │\n", + "│ (RAG)\\nRather than loading all available data at once, agents \\\"pull\\\" information only when necessary.\\n* │\n", + "│ **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data only │\n", + "│ after identifying a specific knowledge gap [2].\\n* **RAG (Retrieval-Augmented Generation)**: The standard for │\n", + "│ large knowledge bases. Instead of \\\"stuffing\\\" a document, the system retrieves only the top-k most relevant │\n", + "│ chunks from a vector store to populate the context [2][3].\\n\\n### Memory Management\\nAgents require │\n", + "│ sophisticated memory systems to maintain continuity without overflowing context limits:\\n* **Short-term │\n", + "│ Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages).\\n* **Long-term │\n", + "│ Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, Vector Stores) and │\n", + "│ retrieved only when relevant to the current query [2].\\n* **Pruning and Trimming**: Algorithms that │\n", + "│ selectively remove older, less relevant tokens while preserving the semantic core of the conversation │\n", + "│ history.\\n\\n### Summarization and Compression\\n* **Tool Output Summarization**: Raw outputs from tools (like │\n", + "│ HTML from a web scraper) are often too verbose. An intermediate step or \\\"summarizer agent\\\" distills this data │\n", + "│ into key points before passing it to the main reasoning agent [2].\\n* **Context Compression**: Removing stop │\n", + "│ words or irrelevant sections from retrieved documents to maximize information density per token.\\n\\n### Prompt │\n", + "│ Structuring\\nUsing strict formats within the context window helps the model parse information correctly:\\n* │\n", + "│ **Separation of Concerns**: Clearly formatting \\\"System Instructions,\\\" \\\"User Input,\\\" and \\\"Tool Outputs\\\" │\n", + "│ (often using XML tags or JSON schemas) prevents the model from confusing data with instructions.\\n\\n## 4. │\n", + "│ Architectural Frameworks\\nTwo primary frameworks dominate the implementation of these │\n", + "│ strategies:\\n\\n**LangChain**\\n* **Focus**: Flexible orchestration and agent workflows.\\n* **Approach**: │\n", + "│ Uses \\\"Chains\\\" to pass context sequentially between steps and \\\"Memory\\\" classes (e.g., │\n", + "│ `ConversationSummaryMemory`) to automatically manage history compression and persistence [4].\\n* │\n", + "│ **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its own │\n", + "│ context.\\n\\n**LlamaIndex**\\n* **Focus**: Data indexing and advanced retrieval.\\n* **Approach**: specialized │\n", + "│ in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`).\\n* **Strategy**: Uses │\n", + "│ \\\"Response Synthesizers\\\" to optimize how retrieved context is presented to the LLM (e.g., `compact` mode to │\n", + "│ stuff chunks or `tree_summarize` for hierarchical reduction) [4].\\n\\n## 5. Strategic Patterns\\n\\n### RAG vs. │\n", + "│ Long-Context Windows (LCW)\\n* **RAG**: Preferred for cost efficiency and low latency. It filters noise before │\n", + "│ the model sees it, often resulting in higher accuracy for \\\"Needle in a Haystack\\\" retrieval tasks [3].\\n* │\n", + "│ **Long-Context Models (e.g., Gemini 1.5)**: Best for \\\"whole-document\\\" reasoning where global context is │\n", + "│ required. However, they are more expensive and prone to the \\\"Lost in the Middle\\\" effect.\\n\\n### Context │\n", + "│ Selection vs. Stuffing\\n* **Context Stuffing**: Naively filling the window with all available data. This is │\n", + "│ generally an anti-pattern as it increases costs and distracts the model.\\n* **Context Selection**: The │\n", + "│ proactive curation of inputs. Techniques include \\\"Node Postprocessors\\\" (filtering results by similarity │\n", + "│ score) and \\\"Query Transformations\\\" (rewriting vague user queries to fetch better context) [4].\\n\\n## 6. │\n", + "│ Conclusion\\nContext engineering is the bridge between raw data and intelligent agent behavior. By moving from │\n", + "│ naive context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, │\n", + "│ developers can build agents that are both powerful and cost-effective. The choice between RAG architectures and │\n", + "│ long-context models remains a key trade-off, with hybrid approaches often providing the best balance of │\n", + "│ accuracy and performance.\\n\\n### Sources\\n[1] Context Engineering Guide: │\n", + "│ https://www.promptingguide.ai/agents/context-engineering\\n[2] Context Engineering Strategies (n8n): │\n", + "│ https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/\\n[3] RAG vs Long │\n", + "│ Context Window Study: https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs\\n[4] │\n", + "│ LangChain & LlamaIndex Comparison: │\n", + "│ https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- │\n", + "│ in-large-2b5f3fabc366\" │\n", "│ } │\n", - "│ ID: 75657a73-9ad8-4815-96b4-615298b4ac95 │\n", + "│ ID: 589357a0-fbce-4941-9090-8f91d12880cb │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"in_progress\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"pending\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: ea8aeaae-7e6a-4f8b-beba-1f252efabd24 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m 🔧 Tool Call: write_file \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"# Context Engineering Approaches for Building AI Agents\\n\\nContext Engineering is the systematic \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m practice of designing, curating, and managing the information state (context) provided to an AI agent. Unlike \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m prompt engineering, which focuses on crafting static instructions, context engineering is an architectural \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m discipline concerned with the dynamic flow of information\\u2014history, tools, and data\\u2014to ensure agent \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m reliability over long-horizon tasks.\\n\\n## Core Principles\\n\\nEffective context engineering aims to solve \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \\\"context rot\\\" and attention scarcity by optimizing the signal-to-noise ratio within the agent's limited \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m context window.\\n\\n* **Dynamic State Management**: Moving beyond static prompts to dynamic environments where \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m context is retrieved, pruned, and updated in real-time [1].\\n* **Signal Maximization**: Prioritizing \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m high-value tokens. As context length increases, a model's ability to retrieve specific details decreases; thus, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m reducing noise is critical for accuracy [2].\\n* **Distinction from Prompt Engineering**:\\n * *Prompt \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Engineering*: \\\"How do I ask this question to get the best answer?\\\"\\n * *Context Engineering*: \\\"How do I \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m architect the agent's memory and environment so it has the right information to answer any question?\\\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [1].\\n\\n## Structural Approaches\\n\\n### 1. Retrieval-Augmented Generation (RAG) Patterns\\nModern agents use \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m advanced RAG patterns to inject context dynamically rather than stuffing the window.\\n* **Agentic RAG**: A \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m multi-step process where the agent actively critiques retrieved data and iteratively queries until it has \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m sufficient context, rather than accepting the first retrieval result [3].\\n* **GraphRAG**: Uses knowledge \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m graphs to structure data hierarchically. This allows agents to answer \\\"global\\\" questions (e.g., \\\"What are \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m the main themes in this dataset?\\\") by traversing community summaries, a capability often missing in standard \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m vector search [4].\\n* **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m post-retrieval steps (re-ranking/pruning) to refine the context before it reaches the LLM [3].\\n\\n### 2. Memory \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Architectures\\nTo handle long-running tasks, agents employ split-memory architectures mimicking human \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m cognition.\\n* **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m hierarchy. It explicitly moves information between \\\"active context\\\" (RAM) and \\\"archival storage\\\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m (Disk/Vector DB) to manage limited context windows autonomously [5].\\n* **Short-Term vs. Long-Term**:\\n * \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling summaries.\\n \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for infinite \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m storage [5].\\n* **Episodic Memory**: Storing sequences of past actions and results to enable few-shot \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m learning from previous experiences.\\n\\n### 3. Context Management Strategies\\n* **Sliding Windows**: Breaking \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m long documents into overlapping chunks (e.g., 512 tokens with 128-token overlap) to process streams of data \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m without losing boundary context [6].\\n* **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m multiple levels of abstraction. Agents first scan high-level summaries and only expand relevant sections into \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m full text [4].\\n\\n## Optimization & Compression Techniques\\n\\nAs context windows fill, performance degrades and \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m costs rise. Optimization techniques aim to compress information without losing semantic value.\\n\\n### 1. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization Strategies\\n* **Rolling Summaries**: Periodically \\\"melting down\\\" the conversation history \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m into a concise narrative. This frees up space but risks losing specific details over time [7].\\n* **Hybrid \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Memory**: A robust approach that keeps \\\"pinned\\\" messages (system prompts, user instructions) verbatim while \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compressing intermediate conversation turns. This prevents critical instructions from being distorted by \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m summarization artifacts [7].\\n\\n### 2. Algorithmic Compression\\n* **Selective Context**: Uses a smaller \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m language model (like GPT-2) to calculate the \\\"self-information\\\" (perplexity) of tokens. Predictable or \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m redundant tokens are pruned, retaining only high-information content [8].\\n* **LLMLingua**: A coarse-to-fine \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compression framework that uses a budget controller to allocate compression ratios dynamically. It creates a \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compressed prompt that retains the original instruction's intent while reducing token count by up to 20x \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [9].\\n\\n### 3. Dynamic Token Pruning\\n* **LazyLLM**: A specialized inference technique that selectively \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m computes the Key-Value (KV) cache only for tokens important for the *next* token prediction. This allows the \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m model to \\\"lazy load\\\" context, speeding up processing for long contexts [10].\\n\\n## Implementation Tools\\n* \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter and \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m compress documents before they enter the agent's context [11].\\n* **Slimcontext**: A lightweight library for \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m managing hybrid memory (pinned + summarized) in Node.js agents [7].\\n\\n### Sources\\n[1] HowAIWorks.ai, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \\\"Context Engineering: AI Agent Optimization Guide,\\\" Oct 2025.\\n[2] Anthropic Engineering, \\\"Effective context \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m engineering for AI agents.\\\"\\n[3] Advanced & Modular RAG Patterns: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://ai-radar.aoe.com/architecture-pattern/rag/\\n[4] GraphRAG Overview: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://microsoft.github.io/graphrag/\\n[5] Agent Memory Systems: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge\\n[6] LLM \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/\\n[7] Don\\u2019t Let Your AI Agent \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Forget: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-his \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m tory-a2d5284539f1\\n[8] Selective Context: https://github.com/liyucheng09/Selective_Context\\n[9] LLMLingua: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://arxiv.org/html/2310.05736v2\\n[10] LazyLLM: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://machinelearning.apple.com/research/dynamic-token-pruning\\n[11] LangChain Contextual Compression: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"file_path\": \"/final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/final_report.md\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"# Context Engineering Approaches for AI Agents\\n\\n## 1. Introduction\\nContext Engineering is the \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m systematic practice of designing, managing, and optimizing the information provided to AI models to ensure \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m accurate, consistent, and context-aware behavior. While often confused with prompt engineering (which focuses \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m on static instructions), context engineering treats the AI's context window as a dynamic, finite resource that \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m must be architected and managed throughout the lifecycle of an interaction. It is a critical discipline for \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m building robust AI agents that can maintain state and relevance over long, complex tasks.\\n\\n## 2. Core \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Concepts\\nTo build effective agents, developers must manage several fundamental aspects of the Large Language \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Model (LLM) environment:\\n\\n* **Context Window**: The hard limit on the amount of text (tokens) an LLM can \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m process at once (e.g., 8k, 128k, or 1M tokens). Effective engineering ensures this space is used \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m efficiently.\\n* **State Management**: Tracking the \\\"world state\\\" across multi-turn conversations. This \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m includes user intent, task progress, and tool outputs.\\n* **Context Pollution**: The phenomenon where \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m irrelevant or contradictory information in the context degrades model performance, leading to hallucinations or \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m confusion [2].\\n* **Serial Position Effect**: The tendency of LLMs to pay more attention to information at \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the very beginning and very end of the context window, often ignoring details buried in the middle (\\\"Lost in \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the Middle\\\") [3].\\n\\n## 3. Key Approaches and Techniques\\n\\n### Dynamic Context Injection & Retrieval \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (RAG)\\nRather than loading all available data at once, agents \\\"pull\\\" information only when necessary.\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data only \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m after identifying a specific knowledge gap [2].\\n* **RAG (Retrieval-Augmented Generation)**: The standard for \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m large knowledge bases. Instead of \\\"stuffing\\\" a document, the system retrieves only the top-k most relevant \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m chunks from a vector store to populate the context [2][3].\\n\\n### Memory Management\\nAgents require \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m sophisticated memory systems to maintain continuity without overflowing context limits:\\n* **Short-term \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages).\\n* **Long-term \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, Vector Stores) and \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m retrieved only when relevant to the current query [2].\\n* **Pruning and Trimming**: Algorithms that \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m selectively remove older, less relevant tokens while preserving the semantic core of the conversation \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m history.\\n\\n### Summarization and Compression\\n* **Tool Output Summarization**: Raw outputs from tools (like \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m HTML from a web scraper) are often too verbose. An intermediate step or \\\"summarizer agent\\\" distills this data \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m into key points before passing it to the main reasoning agent [2].\\n* **Context Compression**: Removing stop \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m words or irrelevant sections from retrieved documents to maximize information density per token.\\n\\n### Prompt \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Structuring\\nUsing strict formats within the context window helps the model parse information correctly:\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Separation of Concerns**: Clearly formatting \\\"System Instructions,\\\" \\\"User Input,\\\" and \\\"Tool Outputs\\\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (often using XML tags or JSON schemas) prevents the model from confusing data with instructions.\\n\\n## 4. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Architectural Frameworks\\nTwo primary frameworks dominate the implementation of these \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m strategies:\\n\\n**LangChain**\\n* **Focus**: Flexible orchestration and agent workflows.\\n* **Approach**: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Uses \\\"Chains\\\" to pass context sequentially between steps and \\\"Memory\\\" classes (e.g., \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m `ConversationSummaryMemory`) to automatically manage history compression and persistence [4].\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its own \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m context.\\n\\n**LlamaIndex**\\n* **Focus**: Data indexing and advanced retrieval.\\n* **Approach**: specialized \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`).\\n* **Strategy**: Uses \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \\\"Response Synthesizers\\\" to optimize how retrieved context is presented to the LLM (e.g., `compact` mode to \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m stuff chunks or `tree_summarize` for hierarchical reduction) [4].\\n\\n## 5. Strategic Patterns\\n\\n### RAG vs. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Long-Context Windows (LCW)\\n* **RAG**: Preferred for cost efficiency and low latency. It filters noise before \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the model sees it, often resulting in higher accuracy for \\\"Needle in a Haystack\\\" retrieval tasks [3].\\n* \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Long-Context Models (e.g., Gemini 1.5)**: Best for \\\"whole-document\\\" reasoning where global context is \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m required. However, they are more expensive and prone to the \\\"Lost in the Middle\\\" effect.\\n\\n### Context \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Selection vs. Stuffing\\n* **Context Stuffing**: Naively filling the window with all available data. This is \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m generally an anti-pattern as it increases costs and distracts the model.\\n* **Context Selection**: The \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m proactive curation of inputs. Techniques include \\\"Node Postprocessors\\\" (filtering results by similarity \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m score) and \\\"Query Transformations\\\" (rewriting vague user queries to fetch better context) [4].\\n\\n## 6. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Conclusion\\nContext engineering is the bridge between raw data and intelligent agent behavior. By moving from \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m naive context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m developers can build agents that are both powerful and cost-effective. The choice between RAG architectures and \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m long-context models remains a key trade-off, with hybrid approaches often providing the best balance of \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m accuracy and performance.\\n\\n### Sources\\n[1] Context Engineering Guide: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m https://www.promptingguide.ai/agents/context-engineering\\n[2] Context Engineering Strategies (n8n): \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/\\n[3] RAG vs Long \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Context Window Study: https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs\\n[4] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m LangChain & LlamaIndex Comparison: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m in-large-2b5f3fabc366\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 75657a73-9ad8-4815-96b4-615298b4ac95 \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 589357a0-fbce-4941-9090-8f91d12880cb \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", + "│ Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, │\n", + "│ {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'}, │\n", + "│ {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'in_progress'}, │\n", + "│ {'content': 'Verify report against original request', 'status': 'pending'}] │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'in_progress'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Verify report against original request', 'status': 'pending'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -1548,60 +1330,22 @@ "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n", "│ │\n", - "│ 🔧 Tool Call: write_todos │\n", + "│ 🔧 Tool Call: read_file │\n", "│ Args: { │\n", - "│ \"todos\": [ │\n", - "│ { │\n", - "│ \"content\": \"Research core concepts and definitions of context engineering in AI agents\", │\n", - "│ \"status\": \"completed\" │\n", - "│ }, │\n", - "│ { │\n", - "│ \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems, │\n", - "│ Sliding windows)\", │\n", - "│ \"status\": \"completed\" │\n", - "│ }, │\n", - "│ { │\n", - "│ \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression, │\n", - "│ Summarization, Token selection)\", │\n", - "│ \"status\": \"completed\" │\n", - "│ }, │\n", - "│ { │\n", - "│ \"content\": \"Write final report on context engineering approaches\", │\n", - "│ \"status\": \"completed\" │\n", - "│ } │\n", - "│ ] │\n", + "│ \"file_path\": \"/research_request.md\" │\n", "│ } │\n", - "│ ID: 41f5ec4f-9cc1-42e5-bfe5-578e82689f9d │\n", + "│ ID: dacd96a0-c106-462a-a710-6fc389064df5 │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: read_file \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research core concepts and definitions of context engineering in AI agents\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research structural techniques for context management (e.g., RAG, Long-term memory systems, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Sliding windows)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Research context optimization and compression techniques (e.g., Prompt compression, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m Summarization, Token selection)\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"content\": \"Write final report on context engineering approaches\", \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m \"status\": \"completed\" \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/research_request.md\" \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ID: 41f5ec4f-9cc1-42e5-bfe5-578e82689f9d \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: dacd96a0-c106-462a-a710-6fc389064df5 \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1612,21 +1356,13 @@ "data": { "text/html": [ "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", - "│ Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', │\n", - "│ 'status': 'completed'}, {'content': 'Research structural techniques for context management (e.g., RAG, │\n", - "│ Long-term memory systems, Sliding windows)', 'status': 'completed'}, {'content': 'Research context optimization │\n", - "│ and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'completed'}, │\n", - "│ {'content': 'Write final report on context engineering approaches', 'status': 'completed'}] │\n", + "│ 1 research context engineering approaches used to build AI agents │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", - "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Research core concepts and definitions of context engineering in AI agents', \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m 'status': 'completed'}, {'content': 'Research structural techniques for context management (e.g., RAG, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m Long-term memory systems, Sliding windows)', 'status': 'completed'}, {'content': 'Research context optimization \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m and compression techniques (e.g., Prompt compression, Summarization, Token selection)', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", - "\u001b[33m│\u001b[0m {'content': 'Write final report on context engineering approaches', 'status': 'completed'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 1 research context engineering approaches used to build AI agents \u001b[33m│\u001b[0m\n", "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1637,57 +1373,384 @@ "data": { "text/html": [ "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n", - "│ The research on context engineering approaches for AI agents is complete. I have compiled a detailed report │\n", - "│ covering core principles, structural architectures (like RAG and Memory Systems), and optimization techniques │\n", - "│ (such as compression and pruning). │\n", "│ │\n", - "│ ### Key Findings │\n", - "│ * **Definition**: Context engineering is the architectural practice of managing the agent's information │\n", - "│ environment (history, tools, data) to ensure reliability, as opposed to just crafting static prompts. │\n", - "│ * **Structural Techniques**: │\n", - "│ * **RAG Patterns**: GraphRAG and Agentic RAG allow for dynamic and hierarchical information retrieval. │\n", - "│ * **Memory**: \"MemGPT\" style architectures that manage Short-Term vs. Long-Term memory autonomously. │\n", - "│ * **Optimization**: │\n", - "│ * **Compression**: Algorithms like **LLMLingua** and **Selective Context** to reduce token usage while │\n", - "│ preserving meaning. │\n", - "│ * **Summarization**: \"Rolling summaries\" and \"Hybrid memory\" to balance history retention with context │\n", - "│ limits. │\n", + "│ 🔧 Tool Call: write_todos │\n", + "│ Args: { │\n", + "│ \"todos\": [ │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Save research request to /research_request.md\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Synthesize findings and write final report to /final_report.md\" │\n", + "│ }, │\n", + "│ { │\n", + "│ \"status\": \"completed\", │\n", + "│ \"content\": \"Verify report against original request\" │\n", + "│ } │\n", + "│ ] │\n", + "│ } │\n", + "│ ID: 4c420284-baa7-4a5b-90ab-e156b502c709 │\n", "│ │\n", - "│ The full report has been saved to `/final_report.md`. │\n", - "│ │\n", - "│ ### Sources │\n", - "│ [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. │\n", - "│ [2] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ │\n", - "│ [3] LLMLingua: https://arxiv.org/html/2310.05736v2 │\n", - "│ [4] GraphRAG Overview: https://microsoft.github.io/graphrag/ │\n", + "│ 🔧 Tool Call: read_file │\n", + "│ Args: { │\n", + "│ \"file_path\": \"/final_report.md\" │\n", + "│ } │\n", + "│ ID: 2d15c15f-589e-4c3d-98b0-03116707c143 │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", - "\u001b[37m│\u001b[0m The research on context engineering approaches for AI agents is complete. I have compiled a detailed report \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m covering core principles, structural architectures (like RAG and Memory Systems), and optimization techniques \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m (such as compression and pruning). \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ### Key Findings \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Definition**: Context engineering is the architectural practice of managing the agent's information \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m environment (history, tools, data) to ensure reliability, as opposed to just crafting static prompts. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Structural Techniques**: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **RAG Patterns**: GraphRAG and Agentic RAG allow for dynamic and hierarchical information retrieval. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Memory**: \"MemGPT\" style architectures that manage Short-Term vs. Long-Term memory autonomously. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Optimization**: \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Compression**: Algorithms like **LLMLingua** and **Selective Context** to reduce token usage while \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m preserving meaning. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m * **Summarization**: \"Rolling summaries\" and \"Hybrid memory\" to balance history retention with context \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m limits. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: write_todos \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"todos\": [ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Save research request to /research_request.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Research context engineering approaches for AI agents using a sub-agent\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Synthesize findings and write final report to /final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m }, \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"status\": \"completed\", \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"content\": \"Verify report against original request\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ] \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 4c420284-baa7-4a5b-90ab-e156b502c709 \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m The full report has been saved to `/final_report.md`. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 🔧 Tool Call: read_file \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Args: { \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \"file_path\": \"/final_report.md\" \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m } \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ID: 2d15c15f-589e-4c3d-98b0-03116707c143 \u001b[37m│\u001b[0m\n", + "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", + "│ Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, │\n", + "│ {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'}, │\n", + "│ {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'completed'}, │\n", + "│ {'content': 'Verify report against original request', 'status': 'completed'}] │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m Updated todo list to [{'content': 'Save research request to /research_request.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Research context engineering approaches for AI agents using a sub-agent', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Synthesize findings and write final report to /final_report.md', 'status': 'completed'}, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m {'content': 'Verify report against original request', 'status': 'completed'}] \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭──────────────────────────────────────────────── 🔧 Tool Output ─────────────────────────────────────────────────╮\n", + "│ 1 # Context Engineering Approaches for AI Agents │\n", + "│ 2 │\n", + "│ 3 ## 1. Introduction │\n", + "│ 4 Context Engineering is the systematic practice of designing, managing, and optimizing the information │\n", + "│ provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with │\n", + "│ prompt engineering (which focuses on static instructions), context engineering treats the AI's context window │\n", + "│ as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an interaction. │\n", + "│ It is a critical discipline for building robust AI agents that can maintain state and relevance over long, │\n", + "│ complex tasks. │\n", + "│ 5 │\n", + "│ 6 ## 2. Core Concepts │\n", + "│ 7 To build effective agents, developers must manage several fundamental aspects of the Large Language │\n", + "│ Model (LLM) environment: │\n", + "│ 8 │\n", + "│ 9 * **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., │\n", + "│ 8k, 128k, or 1M tokens). Effective engineering ensures this space is used efficiently. │\n", + "│ 10 * **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes │\n", + "│ user intent, task progress, and tool outputs. │\n", + "│ 11 * **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context │\n", + "│ degrades model performance, leading to hallucinations or confusion [2]. │\n", + "│ 12 * **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very │\n", + "│ beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the │\n", + "│ Middle\") [3]. │\n", + "│ 13 │\n", + "│ 14 ## 3. Key Approaches and Techniques │\n", + "│ 15 │\n", + "│ 16 ### Dynamic Context Injection & Retrieval (RAG) │\n", + "│ 17 Rather than loading all available data at once, agents \"pull\" information only when necessary. │\n", + "│ 18 * **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time │\n", + "│ data only after identifying a specific knowledge gap [2]. │\n", + "│ 19 * **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of │\n", + "│ \"stuffing\" a document, the system retrieves only the top-k most relevant chunks from a vector store to populate │\n", + "│ the context [2][3]. │\n", + "│ 20 │\n", + "│ 21 ### Memory Management │\n", + "│ 22 Agents require sophisticated memory systems to maintain continuity without overflowing context limits: │\n", + "│ 23 * **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 │\n", + "│ messages). │\n", + "│ 24 * **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases │\n", + "│ (Postgres, Vector Stores) and retrieved only when relevant to the current query [2]. │\n", + "│ 25 * **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while │\n", + "│ preserving the semantic core of the conversation history. │\n", + "│ 26 │\n", + "│ 27 ### Summarization and Compression │\n", + "│ 28 * **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too │\n", + "│ verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to the │\n", + "│ main reasoning agent [2]. │\n", + "│ 29 * **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to │\n", + "│ maximize information density per token. │\n", + "│ 30 │\n", + "│ 31 ### Prompt Structuring │\n", + "│ 32 Using strict formats within the context window helps the model parse information correctly: │\n", + "│ 33 * **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool │\n", + "│ Outputs\" (often using XML tags or JSON schemas) prevents the model from confusing data with instructions. │\n", + "│ 34 │\n", + "│ 35 ## 4. Architectural Frameworks │\n", + "│ 36 Two primary frameworks dominate the implementation of these strategies: │\n", + "│ 37 │\n", + "│ 38 **LangChain** │\n", + "│ 39 * **Focus**: Flexible orchestration and agent workflows. │\n", + "│ 40 * **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g., │\n", + "│ `ConversationSummaryMemory`) to automatically manage history compression and persistence [4]. │\n", + "│ 41 * **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to │\n", + "│ its own context. │\n", + "│ 42 │\n", + "│ 43 **LlamaIndex** │\n", + "│ 44 * **Focus**: Data indexing and advanced retrieval. │\n", + "│ 45 * **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, │\n", + "│ `TreeIndex`). │\n", + "│ 46 * **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the │\n", + "│ LLM (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4]. │\n", + "│ 47 │\n", + "│ 48 ## 5. Strategic Patterns │\n", + "│ 49 │\n", + "│ 50 ### RAG vs. Long-Context Windows (LCW) │\n", + "│ 51 * **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, │\n", + "│ often resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3]. │\n", + "│ 52 * **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global │\n", + "│ context is required. However, they are more expensive and prone to the \"Lost in the Middle\" effect. │\n", + "│ 53 │\n", + "│ 54 ### Context Selection vs. Stuffing │\n", + "│ 55 * **Context Stuffing**: Naively filling the window with all available data. This is generally an │\n", + "│ anti-pattern as it increases costs and distracts the model. │\n", + "│ 56 * **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\" │\n", + "│ (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch │\n", + "│ better context) [4]. │\n", + "│ 57 │\n", + "│ 58 ## 6. Conclusion │\n", + "│ 59 Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive │\n", + "│ context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers │\n", + "│ can build agents that are both powerful and cost-effective. The choice between RAG architectures and │\n", + "│ long-context models remains a key trade-off, with hybrid approaches often providing the best balance of │\n", + "│ accuracy and performance. │\n", + "│ 60 │\n", + "│ 61 ### Sources │\n", + "│ 62 [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering │\n", + "│ 63 [2] Context Engineering Strategies (n8n): │\n", + "│ https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ │\n", + "│ 64 [3] RAG vs Long Context Window Study: │\n", + "│ https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs │\n", + "│ 65 [4] LangChain & LlamaIndex Comparison: │\n", + "│ https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- │\n", + "│ in-large-2b5f3fabc366 │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "\n" + ], + "text/plain": [ + "\u001b[33m╭─\u001b[0m\u001b[33m───────────────────────────────────────────────\u001b[0m\u001b[33m 🔧 Tool Output \u001b[0m\u001b[33m────────────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m\n", + "\u001b[33m│\u001b[0m 1 # Context Engineering Approaches for AI Agents \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 2 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 3 ## 1. Introduction \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 4 Context Engineering is the systematic practice of designing, managing, and optimizing the information \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m prompt engineering (which focuses on static instructions), context engineering treats the AI's context window \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an interaction. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m It is a critical discipline for building robust AI agents that can maintain state and relevance over long, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m complex tasks. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 5 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 6 ## 2. Core Concepts \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 7 To build effective agents, developers must manage several fundamental aspects of the Large Language \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Model (LLM) environment: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 8 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 9 * **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 8k, 128k, or 1M tokens). Effective engineering ensures this space is used efficiently. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 10 * **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m user intent, task progress, and tool outputs. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 11 * **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m degrades model performance, leading to hallucinations or confusion [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 12 * **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Middle\") [3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 13 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 14 ## 3. Key Approaches and Techniques \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 15 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 16 ### Dynamic Context Injection & Retrieval (RAG) \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 17 Rather than loading all available data at once, agents \"pull\" information only when necessary. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 18 * **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m data only after identifying a specific knowledge gap [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 19 * **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m \"stuffing\" a document, the system retrieves only the top-k most relevant chunks from a vector store to populate \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m the context [2][3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 20 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 21 ### Memory Management \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 22 Agents require sophisticated memory systems to maintain continuity without overflowing context limits: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 23 * **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m messages). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 24 * **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m (Postgres, Vector Stores) and retrieved only when relevant to the current query [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 25 * **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m preserving the semantic core of the conversation history. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 26 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 27 ### Summarization and Compression \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 28 * **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m main reasoning agent [2]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 29 * **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m maximize information density per token. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 30 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 31 ### Prompt Structuring \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 32 Using strict formats within the context window helps the model parse information correctly: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 33 * **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m Outputs\" (often using XML tags or JSON schemas) prevents the model from confusing data with instructions. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 34 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 35 ## 4. Architectural Frameworks \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 36 Two primary frameworks dominate the implementation of these strategies: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 37 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 38 **LangChain** \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 39 * **Focus**: Flexible orchestration and agent workflows. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 40 * **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g., \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m `ConversationSummaryMemory`) to automatically manage history compression and persistence [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 41 * **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m its own context. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 42 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 43 **LlamaIndex** \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 44 * **Focus**: Data indexing and advanced retrieval. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 45 * **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m `TreeIndex`). \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 46 * **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m LLM (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 47 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 48 ## 5. Strategic Patterns \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 49 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 50 ### RAG vs. Long-Context Windows (LCW) \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 51 * **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m often resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 52 * **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context is required. However, they are more expensive and prone to the \"Lost in the Middle\" effect. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 53 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 54 ### Context Selection vs. Stuffing \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 55 * **Context Stuffing**: Naively filling the window with all available data. This is generally an \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m anti-pattern as it increases costs and distracts the model. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 56 * **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\" \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m better context) [4]. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 57 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 58 ## 6. Conclusion \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 59 Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m can build agents that are both powerful and cost-effective. The choice between RAG architectures and \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m long-context models remains a key trade-off, with hybrid approaches often providing the best balance of \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m accuracy and performance. \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 60 \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 61 ### Sources \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 62 [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 63 [2] Context Engineering Strategies (n8n): \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 64 [3] RAG vs Long Context Window Study: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m 65 [4] LangChain & LlamaIndex Comparison: \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperability- \u001b[33m│\u001b[0m\n", + "\u001b[33m│\u001b[0m in-large-2b5f3fabc366 \u001b[33m│\u001b[0m\n", + "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
╭───────────────────────────────────────────────────── 📝 AI ─────────────────────────────────────────────────────╮\n", + "│ I have completed the research on context engineering approaches for AI agents. The final report details │\n", + "│ definitions, core concepts, key techniques (RAG, Memory, Summarization), and architectural frameworks │\n", + "│ (LangChain vs LlamaIndex). │\n", + "│ │\n", + "│ ### **Research Findings Summary** │\n", + "│ Context engineering is the discipline of managing the finite \"context window\" of an AI model to ensure it has │\n", + "│ the right information at the right time. It is distinct from prompt engineering and focuses on dynamic state │\n", + "│ management. │\n", + "│ │\n", + "│ **Key Approaches:** │\n", + "│ 1. **Dynamic Context Injection (RAG)**: Instead of loading all data, agents \"pull\" specific information from │\n", + "│ vector databases or APIs only when needed to answer a query. │\n", + "│ 2. **Memory Management**: Using sliding windows (short-term memory) and external databases (long-term memory) │\n", + "│ to maintain conversation history without exceeding token limits. │\n", + "│ 3. **Summarization**: Compressing verbose tool outputs (like web scrape results) before passing them to the │\n", + "│ reasoning agent to save tokens. │\n", + "│ 4. **Architectural Frameworks**: │\n", + "│ * **LangChain**: Focuses on \"Chains\" and dynamic tool selection. │\n", + "│ * **LlamaIndex**: Focuses on data indexing and advanced retrieval strategies. │\n", + "│ │\n", + "│ **Strategic Trade-offs:** │\n", + "│ * **RAG vs. Long-Context**: RAG is often more cost-effective and accurate for finding specific facts │\n", + "│ (\"needles\") in large datasets, while massive context windows (e.g., 1M+ tokens) are better for holistic │\n", + "│ analysis but prone to the \"Lost in the Middle\" phenomenon. │\n", + "│ * **Selection vs. Stuffing**: \"Context Stuffing\" (filling the window blindly) is an anti-pattern. \"Context │\n", + "│ Selection\" (curating inputs via filtering and query transformation) leads to better performance. │\n", + "│ │\n", + "│ The full detailed report can be found in `/final_report.md`. │\n", + "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "\n" + ], + "text/plain": [ + "\u001b[37m╭─\u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m 📝 AI \u001b[0m\u001b[37m────────────────────────────────────────────────────\u001b[0m\u001b[37m─╮\u001b[0m\n", + "\u001b[37m│\u001b[0m I have completed the research on context engineering approaches for AI agents. The final report details \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m definitions, core concepts, key techniques (RAG, Memory, Summarization), and architectural frameworks \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (LangChain vs LlamaIndex). \u001b[37m│\u001b[0m\n", "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m ### Sources \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [2] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [3] LLMLingua: https://arxiv.org/html/2310.05736v2 \u001b[37m│\u001b[0m\n", - "\u001b[37m│\u001b[0m [4] GraphRAG Overview: https://microsoft.github.io/graphrag/ \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m ### **Research Findings Summary** \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Context engineering is the discipline of managing the finite \"context window\" of an AI model to ensure it has \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m the right information at the right time. It is distinct from prompt engineering and focuses on dynamic state \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m management. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Key Approaches:** \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 1. **Dynamic Context Injection (RAG)**: Instead of loading all data, agents \"pull\" specific information from \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m vector databases or APIs only when needed to answer a query. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 2. **Memory Management**: Using sliding windows (short-term memory) and external databases (long-term memory) \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m to maintain conversation history without exceeding token limits. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 3. **Summarization**: Compressing verbose tool outputs (like web scrape results) before passing them to the \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m reasoning agent to save tokens. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m 4. **Architectural Frameworks**: \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **LangChain**: Focuses on \"Chains\" and dynamic tool selection. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **LlamaIndex**: Focuses on data indexing and advanced retrieval strategies. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m **Strategic Trade-offs:** \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **RAG vs. Long-Context**: RAG is often more cost-effective and accurate for finding specific facts \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m (\"needles\") in large datasets, while massive context windows (e.g., 1M+ tokens) are better for holistic \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m analysis but prone to the \"Lost in the Middle\" phenomenon. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m * **Selection vs. Stuffing**: \"Context Stuffing\" (filling the window blindly) is an anti-pattern. \"Context \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m Selection\" (curating inputs via filtering and query transformation) leads to better performance. \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m \u001b[37m│\u001b[0m\n", + "\u001b[37m│\u001b[0m The full detailed report can be found in `/final_report.md`. \u001b[37m│\u001b[0m\n", "\u001b[37m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] }, @@ -1706,13 +1769,12 @@ " ],\n", " }, \n", ")\n", - "\n", "format_messages(result[\"messages\"])" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 13, "id": "188b5ab5", "metadata": {}, "outputs": [ @@ -1721,105 +1783,107 @@ "text/html": [ "
╭──────────────────────────────────────────────────── Prompt ─────────────────────────────────────────────────────╮\n", "│ │\n", - "│ # Context Engineering Approaches for Building AI Agents │\n", + "│ # Context Engineering Approaches for AI Agents │\n", "│ │\n", - "│ Context Engineering is the systematic practice of designing, curating, and managing the information state │\n", - "│ (context) provided to an AI agent. Unlike prompt engineering, which focuses on crafting static instructions, │\n", - "│ context engineering is an architectural discipline concerned with the dynamic flow of information—history, │\n", - "│ tools, and data—to ensure agent reliability over long-horizon tasks. │\n", + "│ ## 1. Introduction │\n", + "│ Context Engineering is the systematic practice of designing, managing, and optimizing the information │\n", + "│ provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with │\n", + "│ prompt engineering (which focuses on static instructions), context engineering treats the AI's context window │\n", + "│ as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an │\n", + "│ interaction. It is a critical discipline for building robust AI agents that can maintain state and relevance │\n", + "│ over long, complex tasks. │\n", "│ │\n", - "│ ## Core Principles │\n", + "│ ## 2. Core Concepts │\n", + "│ To build effective agents, developers must manage several fundamental aspects of the Large Language Model │\n", + "│ (LLM) environment: │\n", "│ │\n", - "│ Effective context engineering aims to solve \"context rot\" and attention scarcity by optimizing the │\n", - "│ signal-to-noise ratio within the agent's limited context window. │\n", + "│ * **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., 8k, │\n", + "│ 128k, or 1M tokens). Effective engineering ensures this space is used efficiently. │\n", + "│ * **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes user │\n", + "│ intent, task progress, and tool outputs. │\n", + "│ * **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context │\n", + "│ degrades model performance, leading to hallucinations or confusion [2]. │\n", + "│ * **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very │\n", + "│ beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the │\n", + "│ Middle\") [3]. │\n", "│ │\n", - "│ * **Dynamic State Management**: Moving beyond static prompts to dynamic environments where context is │\n", - "│ retrieved, pruned, and updated in real-time [1]. │\n", - "│ * **Signal Maximization**: Prioritizing high-value tokens. As context length increases, a model's ability │\n", - "│ to retrieve specific details decreases; thus, reducing noise is critical for accuracy [2]. │\n", - "│ * **Distinction from Prompt Engineering**: │\n", - "│ * *Prompt Engineering*: \"How do I ask this question to get the best answer?\" │\n", - "│ * *Context Engineering*: \"How do I architect the agent's memory and environment so it has the right │\n", - "│ information to answer any question?\" [1]. │\n", + "│ ## 3. Key Approaches and Techniques │\n", "│ │\n", - "│ ## Structural Approaches │\n", + "│ ### Dynamic Context Injection & Retrieval (RAG) │\n", + "│ Rather than loading all available data at once, agents \"pull\" information only when necessary. │\n", + "│ * **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data │\n", + "│ only after identifying a specific knowledge gap [2]. │\n", + "│ * **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of \"stuffing\" a │\n", + "│ document, the system retrieves only the top-k most relevant chunks from a vector store to populate the │\n", + "│ context [2][3]. │\n", "│ │\n", - "│ ### 1. Retrieval-Augmented Generation (RAG) Patterns │\n", - "│ Modern agents use advanced RAG patterns to inject context dynamically rather than stuffing the window. │\n", - "│ * **Agentic RAG**: A multi-step process where the agent actively critiques retrieved data and iteratively │\n", - "│ queries until it has sufficient context, rather than accepting the first retrieval result [3]. │\n", - "│ * **GraphRAG**: Uses knowledge graphs to structure data hierarchically. This allows agents to answer │\n", - "│ \"global\" questions (e.g., \"What are the main themes in this dataset?\") by traversing community summaries, a │\n", - "│ capability often missing in standard vector search [4]. │\n", - "│ * **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and post-retrieval steps │\n", - "│ (re-ranking/pruning) to refine the context before it reaches the LLM [3]. │\n", + "│ ### Memory Management │\n", + "│ Agents require sophisticated memory systems to maintain continuity without overflowing context limits: │\n", + "│ * **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages). │\n", + "│ * **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, │\n", + "│ Vector Stores) and retrieved only when relevant to the current query [2]. │\n", + "│ * **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while preserving │\n", + "│ the semantic core of the conversation history. │\n", "│ │\n", - "│ ### 2. Memory Architectures │\n", - "│ To handle long-running tasks, agents employ split-memory architectures mimicking human cognition. │\n", - "│ * **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory hierarchy. It │\n", - "│ explicitly moves information between \"active context\" (RAM) and \"archival storage\" (Disk/Vector DB) to manage │\n", - "│ limited context windows autonomously [5]. │\n", - "│ * **Short-Term vs. Long-Term**: │\n", - "│ * *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling │\n", - "│ summaries. │\n", - "│ * *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for │\n", - "│ infinite storage [5]. │\n", - "│ * **Episodic Memory**: Storing sequences of past actions and results to enable few-shot learning from │\n", - "│ previous experiences. │\n", + "│ ### Summarization and Compression │\n", + "│ * **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too │\n", + "│ verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to │\n", + "│ the main reasoning agent [2]. │\n", + "│ * **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to maximize │\n", + "│ information density per token. │\n", "│ │\n", - "│ ### 3. Context Management Strategies │\n", - "│ * **Sliding Windows**: Breaking long documents into overlapping chunks (e.g., 512 tokens with 128-token │\n", - "│ overlap) to process streams of data without losing boundary context [6]. │\n", - "│ * **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at multiple levels of abstraction. │\n", - "│ Agents first scan high-level summaries and only expand relevant sections into full text [4]. │\n", + "│ ### Prompt Structuring │\n", + "│ Using strict formats within the context window helps the model parse information correctly: │\n", + "│ * **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool Outputs\" │\n", + "│ (often using XML tags or JSON schemas) prevents the model from confusing data with instructions. │\n", "│ │\n", - "│ ## Optimization & Compression Techniques │\n", + "│ ## 4. Architectural Frameworks │\n", + "│ Two primary frameworks dominate the implementation of these strategies: │\n", "│ │\n", - "│ As context windows fill, performance degrades and costs rise. Optimization techniques aim to compress │\n", - "│ information without losing semantic value. │\n", + "│ **LangChain** │\n", + "│ * **Focus**: Flexible orchestration and agent workflows. │\n", + "│ * **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g., │\n", + "│ `ConversationSummaryMemory`) to automatically manage history compression and persistence [4]. │\n", + "│ * **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its │\n", + "│ own context. │\n", "│ │\n", - "│ ### 1. Summarization Strategies │\n", - "│ * **Rolling Summaries**: Periodically \"melting down\" the conversation history into a concise narrative. │\n", - "│ This frees up space but risks losing specific details over time [7]. │\n", - "│ * **Hybrid Memory**: A robust approach that keeps \"pinned\" messages (system prompts, user instructions) │\n", - "│ verbatim while compressing intermediate conversation turns. This prevents critical instructions from being │\n", - "│ distorted by summarization artifacts [7]. │\n", + "│ **LlamaIndex** │\n", + "│ * **Focus**: Data indexing and advanced retrieval. │\n", + "│ * **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`). │\n", + "│ * **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the LLM │\n", + "│ (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4]. │\n", "│ │\n", - "│ ### 2. Algorithmic Compression │\n", - "│ * **Selective Context**: Uses a smaller language model (like GPT-2) to calculate the \"self-information\" │\n", - "│ (perplexity) of tokens. Predictable or redundant tokens are pruned, retaining only high-information content │\n", - "│ [8]. │\n", - "│ * **LLMLingua**: A coarse-to-fine compression framework that uses a budget controller to allocate │\n", - "│ compression ratios dynamically. It creates a compressed prompt that retains the original instruction's intent │\n", - "│ while reducing token count by up to 20x [9]. │\n", + "│ ## 5. Strategic Patterns │\n", "│ │\n", - "│ ### 3. Dynamic Token Pruning │\n", - "│ * **LazyLLM**: A specialized inference technique that selectively computes the Key-Value (KV) cache only │\n", - "│ for tokens important for the *next* token prediction. This allows the model to \"lazy load\" context, speeding │\n", - "│ up processing for long contexts [10]. │\n", + "│ ### RAG vs. Long-Context Windows (LCW) │\n", + "│ * **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, often │\n", + "│ resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3]. │\n", + "│ * **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global context is │\n", + "│ required. However, they are more expensive and prone to the \"Lost in the Middle\" effect. │\n", "│ │\n", - "│ ## Implementation Tools │\n", - "│ * **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter │\n", - "│ and compress documents before they enter the agent's context [11]. │\n", - "│ * **Slimcontext**: A lightweight library for managing hybrid memory (pinned + summarized) in Node.js agents │\n", - "│ [7]. │\n", + "│ ### Context Selection vs. Stuffing │\n", + "│ * **Context Stuffing**: Naively filling the window with all available data. This is generally an │\n", + "│ anti-pattern as it increases costs and distracts the model. │\n", + "│ * **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\" │\n", + "│ (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch │\n", + "│ better context) [4]. │\n", + "│ │\n", + "│ ## 6. Conclusion │\n", + "│ Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive │\n", + "│ context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers │\n", + "│ can build agents that are both powerful and cost-effective. The choice between RAG architectures and │\n", + "│ long-context models remains a key trade-off, with hybrid approaches often providing the best balance of │\n", + "│ accuracy and performance. │\n", "│ │\n", "│ ### Sources │\n", - "│ [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. │\n", - "│ [2] Anthropic Engineering, \"Effective context engineering for AI agents.\" │\n", - "│ [3] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ │\n", - "│ [4] GraphRAG Overview: https://microsoft.github.io/graphrag/ │\n", - "│ [5] Agent Memory Systems: │\n", - "│ https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge │\n", - "│ [6] LLM Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/ │\n", - "│ [7] Don’t Let Your AI Agent Forget: │\n", - "│ https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-h │\n", - "│ istory-a2d5284539f1 │\n", - "│ [8] Selective Context: https://github.com/liyucheng09/Selective_Context │\n", - "│ [9] LLMLingua: https://arxiv.org/html/2310.05736v2 │\n", - "│ [10] LazyLLM: https://machinelearning.apple.com/research/dynamic-token-pruning │\n", - "│ [11] LangChain Contextual Compression: │\n", - "│ https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html │\n", + "│ [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering │\n", + "│ [2] Context Engineering Strategies (n8n): │\n", + "│ https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ │\n", + "│ [3] RAG vs Long Context Window Study: │\n", + "│ https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs │\n", + "│ [4] LangChain & LlamaIndex Comparison: │\n", + "│ https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperabilit │\n", + "│ y-in-large-2b5f3fabc366 │\n", "│ │\n", "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "\n" @@ -1827,105 +1891,107 @@ "text/plain": [ "\u001b[34m╭─\u001b[0m\u001b[34m───────────────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;32mPrompt\u001b[0m\u001b[34m \u001b[0m\u001b[34m────────────────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m # Context Engineering Approaches for Building AI Agents \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m # Context Engineering Approaches for AI Agents \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Context Engineering is the systematic practice of designing, curating, and managing the information state \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m (context) provided to an AI agent. Unlike prompt engineering, which focuses on crafting static instructions, \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m context engineering is an architectural discipline concerned with the dynamic flow of information—history, \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m tools, and data—to ensure agent reliability over long-horizon tasks. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 1. Introduction\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Context Engineering is the systematic practice of designing, managing, and optimizing the information \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m provided to AI models to ensure accurate, consistent, and context-aware behavior. While often confused with \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m prompt engineering (which focuses on static instructions), context engineering treats the AI's context window \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m as a dynamic, finite resource that must be architected and managed throughout the lifecycle of an \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m interaction. It is a critical discipline for building robust AI agents that can maintain state and relevance \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m over long, complex tasks. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Core Principles\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 2. Core Concepts\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m To build effective agents, developers must manage several fundamental aspects of the Large Language Model \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (LLM) environment: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Effective context engineering aims to solve \"context rot\" and attention scarcity by optimizing the \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m signal-to-noise ratio within the agent's limited context window. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Window**: The hard limit on the amount of text (tokens) an LLM can process at once (e.g., 8k, \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m 128k, or 1M tokens). Effective engineering ensures this space is used efficiently. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **State Management**: Tracking the \"world state\" across multi-turn conversations. This includes user \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m intent, task progress, and tool outputs. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Pollution**: The phenomenon where irrelevant or contradictory information in the context \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m degrades model performance, leading to hallucinations or confusion [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Serial Position Effect**: The tendency of LLMs to pay more attention to information at the very \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m beginning and very end of the context window, often ignoring details buried in the middle (\"Lost in the \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Middle\") [3]. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Dynamic State Management**: Moving beyond static prompts to dynamic environments where context is \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m retrieved, pruned, and updated in real-time [1]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Signal Maximization**: Prioritizing high-value tokens. As context length increases, a model's ability \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m to retrieve specific details decreases; thus, reducing noise is critical for accuracy [2]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Distinction from Prompt Engineering**: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Prompt Engineering*: \"How do I ask this question to get the best answer?\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Context Engineering*: \"How do I architect the agent's memory and environment so it has the right \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m information to answer any question?\" [1]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 3. Key Approaches and Techniques\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Structural Approaches\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Dynamic Context Injection & Retrieval (RAG)\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Rather than loading all available data at once, agents \"pull\" information only when necessary. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Dynamic Injection**: Agents use tools (like search APIs or database queries) to fetch real-time data \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m only after identifying a specific knowledge gap [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **RAG (Retrieval-Augmented Generation)**: The standard for large knowledge bases. Instead of \"stuffing\" a \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m document, the system retrieves only the top-k most relevant chunks from a vector store to populate the \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m context [2][3]. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 1. Retrieval-Augmented Generation (RAG) Patterns\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Modern agents use advanced RAG patterns to inject context dynamically rather than stuffing the window. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Agentic RAG**: A multi-step process where the agent actively critiques retrieved data and iteratively \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m queries until it has sufficient context, rather than accepting the first retrieval result [3]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **GraphRAG**: Uses knowledge graphs to structure data hierarchically. This allows agents to answer \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \"global\" questions (e.g., \"What are the main themes in this dataset?\") by traversing community summaries, a \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m capability often missing in standard vector search [4]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Modular RAG**: Incorporates pre-retrieval steps (query rewriting/expansion) and post-retrieval steps \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m (re-ranking/pruning) to refine the context before it reaches the LLM [3]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Memory Management\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Agents require sophisticated memory systems to maintain continuity without overflowing context limits: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Short-term Memory**: A sliding window of the most recent interaction turns (e.g., last 10 messages). \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Long-term Memory**: Key facts (e.g., user preferences) are persisted in external databases (Postgres, \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Vector Stores) and retrieved only when relevant to the current query [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Pruning and Trimming**: Algorithms that selectively remove older, less relevant tokens while preserving \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m the semantic core of the conversation history. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 2. Memory Architectures\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m To handle long-running tasks, agents employ split-memory architectures mimicking human cognition. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **MemGPT Pattern**: Treats the LLM as an operating system that manages its own memory hierarchy. It \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m explicitly moves information between \"active context\" (RAM) and \"archival storage\" (Disk/Vector DB) to manage \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m limited context windows autonomously [5]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Short-Term vs. Long-Term**: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Short-Term (STM)*: The immediate context window, often managed with sliding windows or rolling \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m summaries. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * *Long-Term (LTM)*: Vector databases (semantic recall) and SQL databases (structured recall) for \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m infinite storage [5]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Episodic Memory**: Storing sequences of past actions and results to enable few-shot learning from \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m previous experiences. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Summarization and Compression\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Tool Output Summarization**: Raw outputs from tools (like HTML from a web scraper) are often too \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m verbose. An intermediate step or \"summarizer agent\" distills this data into key points before passing it to \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m the main reasoning agent [2]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Compression**: Removing stop words or irrelevant sections from retrieved documents to maximize \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m information density per token. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 3. Context Management Strategies\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Sliding Windows**: Breaking long documents into overlapping chunks (e.g., 512 tokens with 128-token \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m overlap) to process streams of data without losing boundary context [6]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Hierarchical Context (Fusion-in-Decoder)**: Processing documents at multiple levels of abstraction. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m Agents first scan high-level summaries and only expand relevant sections into full text [4]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Prompt Structuring\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Using strict formats within the context window helps the model parse information correctly: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Separation of Concerns**: Clearly formatting \"System Instructions,\" \"User Input,\" and \"Tool Outputs\" \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (often using XML tags or JSON schemas) prevents the model from confusing data with instructions. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Optimization & Compression Techniques\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 4. Architectural Frameworks\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Two primary frameworks dominate the implementation of these strategies: \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m As context windows fill, performance degrades and costs rise. Optimization techniques aim to compress \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m information without losing semantic value. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **LangChain** \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Focus**: Flexible orchestration and agent workflows. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Approach**: Uses \"Chains\" to pass context sequentially between steps and \"Memory\" classes (e.g., \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m `ConversationSummaryMemory`) to automatically manage history compression and persistence [4]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Strategy**: Emphasizes dynamic tool selection where the agent decides what information to add to its \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m own context. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 1. Summarization Strategies\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Rolling Summaries**: Periodically \"melting down\" the conversation history into a concise narrative. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m This frees up space but risks losing specific details over time [7]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Hybrid Memory**: A robust approach that keeps \"pinned\" messages (system prompts, user instructions) \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m verbatim while compressing intermediate conversation turns. This prevents critical instructions from being \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m distorted by summarization artifacts [7]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m **LlamaIndex** \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Focus**: Data indexing and advanced retrieval. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Approach**: specialized in structuring data for LLMs using indices (`VectorStoreIndex`, `TreeIndex`). \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Strategy**: Uses \"Response Synthesizers\" to optimize how retrieved context is presented to the LLM \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (e.g., `compact` mode to stuff chunks or `tree_summarize` for hierarchical reduction) [4]. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 2. Algorithmic Compression\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Selective Context**: Uses a smaller language model (like GPT-2) to calculate the \"self-information\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m (perplexity) of tokens. Predictable or redundant tokens are pruned, retaining only high-information content \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [8]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **LLMLingua**: A coarse-to-fine compression framework that uses a budget controller to allocate \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m compression ratios dynamically. It creates a compressed prompt that retains the original instruction's intent \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m while reducing token count by up to 20x [9]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 5. Strategic Patterns\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## 3. Dynamic Token Pruning\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **LazyLLM**: A specialized inference technique that selectively computes the Key-Value (KV) cache only \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m for tokens important for the *next* token prediction. This allows the model to \"lazy load\" context, speeding \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m up processing for long contexts [10]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## RAG vs. Long-Context Windows (LCW)\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **RAG**: Preferred for cost efficiency and low latency. It filters noise before the model sees it, often \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m resulting in higher accuracy for \"Needle in a Haystack\" retrieval tasks [3]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Long-Context Models (e.g., Gemini 1.5)**: Best for \"whole-document\" reasoning where global context is \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m required. However, they are more expensive and prone to the \"Lost in the Middle\" effect. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m## Implementation Tools\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **LangChain**: Provides `ContextualCompressionRetriever` and `LLMChainExtractor` to automatically filter \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m and compress documents before they enter the agent's context [11]. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m * **Slimcontext**: A lightweight library for managing hybrid memory (pinned + summarized) in Node.js agents \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [7]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Context Selection vs. Stuffing\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Stuffing**: Naively filling the window with all available data. This is generally an \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m anti-pattern as it increases costs and distracts the model. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m * **Context Selection**: The proactive curation of inputs. Techniques include \"Node Postprocessors\" \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m (filtering results by similarity score) and \"Query Transformations\" (rewriting vague user queries to fetch \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m better context) [4]. \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m \u001b[1;35m## 6. Conclusion\u001b[0m \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m Context engineering is the bridge between raw data and intelligent agent behavior. By moving from naive \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m context stuffing to sophisticated dynamic injection, memory management, and structured retrieval, developers \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m can build agents that are both powerful and cost-effective. The choice between RAG architectures and \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m long-context models remains a key trade-off, with hybrid approaches often providing the best balance of \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m accuracy and performance. \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[1;36m#\u001b[0m\u001b[1;36m## Sources\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [1] HowAIWorks.ai, \"Context Engineering: AI Agent Optimization Guide,\" Oct 2025. \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [2] Anthropic Engineering, \"Effective context engineering for AI agents.\" \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [3] Advanced & Modular RAG Patterns: https://ai-radar.aoe.com/architecture-pattern/rag/ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [4] GraphRAG Overview: https://microsoft.github.io/graphrag/ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [5] Agent Memory Systems: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m https://www.linkedin.com/pulse/day-4-agent-memory-systems-short-term-long-term-episodic-marques-rp3ge \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [6] LLM Input Formatting & Sliding Windows: https://milvus.io/ai-quick-reference/ \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [7] Don’t Let Your AI Agent Forget: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m https://techwithibrahim.medium.com/dont-let-your-ai-agent-forget-smarter-strategies-for-summarizing-message-h \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m istory-a2d5284539f1 \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [8] Selective Context: https://github.com/liyucheng09/Selective_Context \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [9] LLMLingua: https://arxiv.org/html/2310.05736v2 \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [10] LazyLLM: https://machinelearning.apple.com/research/dynamic-token-pruning \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m [11] LangChain Contextual Compression: \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m https://lagnchain.readthedocs.io/en/latest/modules/indexes/retrievers/examples/contextual-compression.html \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [1] Context Engineering Guide: https://www.promptingguide.ai/agents/context-engineering \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [2] Context Engineering Strategies (n8n): \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m https://www.theaiautomators.com/context-engineering-strategies-to-build-better-ai-agents/ \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [3] RAG vs Long Context Window Study: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m https://www.legionintel.com/blog/rag-systems-vs-lcw-performance-and-cost-trade-offs \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m [4] LangChain & LlamaIndex Comparison: \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m https://medium.com/badal-io/exploring-langchain-and-llamaindex-to-achieve-standardization-and-interoperabilit \u001b[34m│\u001b[0m\n", + "\u001b[34m│\u001b[0m y-in-large-2b5f3fabc366 \u001b[34m│\u001b[0m\n", "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" ] @@ -1949,7 +2015,7 @@ "source": [ "Trace: \n", "\n", - "https://smith.langchain.com/public/34012df8-ffe1-424b-aa17-efb21e87c53e/r" + "https://smith.langchain.com/public/72d23852-4616-4bcc-8d8a-b0d1905c945b/r" ] }, { diff --git a/deep_research/research_agent/prompts.py b/deep_research/research_agent/prompts.py index cdcbf33..588bec0 100644 --- a/deep_research/research_agent/prompts.py +++ b/deep_research/research_agent/prompts.py @@ -4,11 +4,12 @@ RESEARCH_WORKFLOW_INSTRUCTIONS = """# Research Workflow Follow this workflow for all research requests: -1. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` -2. **Plan**: Create a todo list with write_todos to break down the research into focused tasks -3. **Research**: Delegate research tasks to sub-agents using the task() tool -4. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines below) -5. **Verify**: Read `/research_request.md` to confirm you've addressed all aspects of the original question +1. **Plan**: Create a todo list with write_todos to break down the research into focused tasks +2. **Save the request**: Use write_file() to save the user's research question to `/research_request.md` +3. **Research**: Delegate research tasks to sub-agents using the task() tool - ALWAYS use sub-agents for research, never conduct research yourself +4. **Synthesize**: Review all sub-agent findings and consolidate citations (each unique URL gets one number across all findings) +5. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines below) +6. **Verify**: Read `/research_request.md` and confirm you've addressed all aspects with proper citations and structure ## Research Planning Guidelines - Batch similar research tasks into a single TODO to minimize overhead @@ -48,10 +49,9 @@ Simply list items with details - no introduction needed: - Each section should be comprehensive and detailed - Use bullet points only when listing is more appropriate than prose -