mirror of
https://github.com/BillyOutlast/posthog.git
synced 2026-02-04 03:01:23 +01:00
chore(ph-ai): increase cache ttl (#41458)
This commit is contained in:
@@ -42,9 +42,9 @@ from ee.hogai.utils.types.base import NodePath
|
||||
|
||||
from .compaction_manager import AnthropicConversationCompactionManager
|
||||
from .prompts import (
|
||||
AGENT_CORE_MEMORY_PROMPT,
|
||||
AGENT_PROMPT,
|
||||
BASIC_FUNCTIONALITY_PROMPT,
|
||||
CORE_MEMORY_INSTRUCTIONS_PROMPT,
|
||||
DOING_TASKS_PROMPT,
|
||||
PROACTIVENESS_PROMPT,
|
||||
ROLE_PROMPT,
|
||||
@@ -212,6 +212,7 @@ class AgentExecutable(BaseAgentExecutable):
|
||||
system_prompts = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", self._get_system_prompt(state, config)),
|
||||
("system", AGENT_CORE_MEMORY_PROMPT),
|
||||
],
|
||||
template_format="mustache",
|
||||
).format_messages(
|
||||
@@ -221,7 +222,7 @@ class AgentExecutable(BaseAgentExecutable):
|
||||
)
|
||||
|
||||
# Mark the longest default prefix as cacheable
|
||||
add_cache_control(system_prompts[-1])
|
||||
add_cache_control(system_prompts[0], ttl="1h")
|
||||
|
||||
message = await model.ainvoke(system_prompts + langchain_messages, config)
|
||||
assistant_message = self._process_output_message(message)
|
||||
@@ -263,7 +264,6 @@ class AgentExecutable(BaseAgentExecutable):
|
||||
- `{{{task_management}}}`
|
||||
- `{{{doing_tasks}}}`
|
||||
- `{{{tool_usage_policy}}}`
|
||||
- `{{{core_memory_instructions}}}`
|
||||
|
||||
The variables from above can have the following nested variables that will be injected:
|
||||
- `{{{groups}}}` – a prompt containing the description of the groups.
|
||||
@@ -291,7 +291,6 @@ class AgentExecutable(BaseAgentExecutable):
|
||||
task_management=TASK_MANAGEMENT_PROMPT,
|
||||
doing_tasks=DOING_TASKS_PROMPT,
|
||||
tool_usage_policy=TOOL_USAGE_POLICY_PROMPT,
|
||||
core_memory_instructions=CORE_MEMORY_INSTRUCTIONS_PROMPT,
|
||||
)
|
||||
|
||||
async def _get_billing_prompt(self) -> str:
|
||||
|
||||
@@ -132,11 +132,6 @@ TOOL_USAGE_POLICY_PROMPT = """
|
||||
</tool_usage_policy>
|
||||
""".strip()
|
||||
|
||||
CORE_MEMORY_INSTRUCTIONS_PROMPT = """
|
||||
{{{core_memory}}}
|
||||
New memories will automatically be added to the core memory as the conversation progresses. If users ask to save, update, or delete the core memory, say you have done it. If the '/remember [information]' command is used, the information gets appended verbatim to core memory.
|
||||
""".strip()
|
||||
|
||||
AGENT_PROMPT = """
|
||||
{{{role}}}
|
||||
|
||||
@@ -155,8 +150,11 @@ AGENT_PROMPT = """
|
||||
{{{tool_usage_policy}}}
|
||||
|
||||
{{{billing_context}}}
|
||||
""".strip()
|
||||
|
||||
{{{core_memory_instructions}}}
|
||||
AGENT_CORE_MEMORY_PROMPT = """
|
||||
{{{core_memory}}}
|
||||
New memories will automatically be added to the core memory as the conversation progresses. If users ask to save, update, or delete the core memory, say you have done it. If the '/remember [information]' command is used, the information gets appended verbatim to core memory.
|
||||
""".strip()
|
||||
|
||||
# Conditional prompts
|
||||
|
||||
@@ -445,6 +445,46 @@ class TestAgentNode(ClickhouseTestMixin, BaseTest):
|
||||
self.assertIn("You are currently in project ", system_content)
|
||||
self.assertIn("The user's name appears to be ", system_content)
|
||||
|
||||
async def test_node_includes_core_memory_in_system_prompt(self):
|
||||
"""Test that core memory content is appended to the conversation in system prompts"""
|
||||
with (
|
||||
patch("os.environ", {"ANTHROPIC_API_KEY": "foo"}),
|
||||
patch("langchain_anthropic.chat_models.ChatAnthropic._agenerate") as mock_generate,
|
||||
patch("ee.hogai.graph.agent_modes.nodes.AgentExecutable._aget_core_memory_text") as mock_core_memory,
|
||||
):
|
||||
mock_core_memory.return_value = "User prefers concise responses and technical details"
|
||||
mock_generate.return_value = ChatResult(
|
||||
generations=[ChatGeneration(message=AIMessage(content="Response"))],
|
||||
llm_output={},
|
||||
)
|
||||
|
||||
node = _create_agent_node(self.team, self.user)
|
||||
config = RunnableConfig(configurable={})
|
||||
node._config = config
|
||||
|
||||
await node.arun(AssistantState(messages=[HumanMessage(content="Test")]), config)
|
||||
|
||||
# Verify _agenerate was called
|
||||
mock_generate.assert_called_once()
|
||||
|
||||
# Get the messages passed to _agenerate
|
||||
call_args = mock_generate.call_args
|
||||
messages = call_args[0][0]
|
||||
|
||||
# Check system messages contain core memory
|
||||
system_messages = [msg for msg in messages if isinstance(msg, SystemMessage)]
|
||||
self.assertGreater(len(system_messages), 0)
|
||||
|
||||
content_parts = []
|
||||
for msg in system_messages:
|
||||
if isinstance(msg.content, str):
|
||||
content_parts.append(msg.content)
|
||||
else:
|
||||
content_parts.append(str(msg.content))
|
||||
system_content = "\n\n".join(content_parts)
|
||||
|
||||
self.assertIn("User prefers concise responses and technical details", system_content)
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
# (membership_level, add_context, expected_prompt)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Any, cast
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from langchain_core import messages
|
||||
from langchain_core.messages import BaseMessage
|
||||
@@ -15,17 +15,22 @@ def get_anthropic_thinking_from_assistant_message(message: AssistantMessage) ->
|
||||
return []
|
||||
|
||||
|
||||
def add_cache_control(message: BaseMessage) -> BaseMessage:
|
||||
def add_cache_control(message: BaseMessage, ttl: Literal["5m", "1h"] | None = None) -> BaseMessage:
|
||||
ttl = ttl or "5m"
|
||||
if isinstance(message.content, str):
|
||||
message.content = [
|
||||
{"type": "text", "text": message.content, "cache_control": {"type": "ephemeral"}},
|
||||
{"type": "text", "text": message.content, "cache_control": {"type": "ephemeral", "ttl": ttl}},
|
||||
]
|
||||
if message.content:
|
||||
last_content = message.content[-1]
|
||||
if isinstance(last_content, str):
|
||||
message.content[-1] = {"type": "text", "text": last_content, "cache_control": {"type": "ephemeral"}}
|
||||
message.content[-1] = {
|
||||
"type": "text",
|
||||
"text": last_content,
|
||||
"cache_control": {"type": "ephemeral", "ttl": ttl},
|
||||
}
|
||||
else:
|
||||
last_content["cache_control"] = {"type": "ephemeral"}
|
||||
last_content["cache_control"] = {"type": "ephemeral", "ttl": ttl}
|
||||
return message
|
||||
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ class TestAnthropicUtils(BaseTest):
|
||||
assert isinstance(message.content[0], dict)
|
||||
self.assertEqual(message.content[0]["type"], "text")
|
||||
self.assertEqual(message.content[0]["text"], "Test message")
|
||||
self.assertEqual(message.content[0]["cache_control"], {"type": "ephemeral"})
|
||||
self.assertEqual(message.content[0]["cache_control"], {"type": "ephemeral", "ttl": "5m"})
|
||||
|
||||
def test_add_cache_control_list_content_with_string_last(self):
|
||||
"""Test adding cache control to message with list content ending in string"""
|
||||
@@ -79,7 +79,7 @@ class TestAnthropicUtils(BaseTest):
|
||||
assert isinstance(message.content[1], dict)
|
||||
self.assertEqual(message.content[1]["type"], "text")
|
||||
self.assertEqual(message.content[1]["text"], "Second part as string")
|
||||
self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral"})
|
||||
self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral", "ttl": "5m"})
|
||||
|
||||
def test_add_cache_control_list_content_with_dict_last(self):
|
||||
"""Test adding cache control to message with list content ending in dict"""
|
||||
@@ -103,4 +103,4 @@ class TestAnthropicUtils(BaseTest):
|
||||
assert isinstance(message.content[1], dict)
|
||||
self.assertEqual(message.content[1]["type"], "image")
|
||||
self.assertEqual(message.content[1]["url"], "http://example.com/image.jpg")
|
||||
self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral"})
|
||||
self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral", "ttl": "5m"})
|
||||
|
||||
Reference in New Issue
Block a user