chore(ph-ai): increase cache ttl (#41458)

2026-02-04 03:01:23 +01:00 · 2025-11-13 17:42:58 +01:00
parent 30cc93ca96
commit da1a1fb6d9
5 changed files with 60 additions and 18 deletions
--- a/ee/hogai/graph/agent_modes/nodes.py
+++ b/ee/hogai/graph/agent_modes/nodes.py
@@ -42,9 +42,9 @@ from ee.hogai.utils.types.base import NodePath

 from .compaction_manager import AnthropicConversationCompactionManager
 from .prompts import (
+    AGENT_CORE_MEMORY_PROMPT,
    AGENT_PROMPT,
    BASIC_FUNCTIONALITY_PROMPT,
-    CORE_MEMORY_INSTRUCTIONS_PROMPT,
    DOING_TASKS_PROMPT,
    PROACTIVENESS_PROMPT,
    ROLE_PROMPT,
@@ -212,6 +212,7 @@ class AgentExecutable(BaseAgentExecutable):
        system_prompts = ChatPromptTemplate.from_messages(
            [
                ("system", self._get_system_prompt(state, config)),
+                ("system", AGENT_CORE_MEMORY_PROMPT),
            ],
            template_format="mustache",
        ).format_messages(
@@ -221,7 +222,7 @@ class AgentExecutable(BaseAgentExecutable):
        )

        # Mark the longest default prefix as cacheable
-        add_cache_control(system_prompts[-1])
+        add_cache_control(system_prompts[0], ttl="1h")

        message = await model.ainvoke(system_prompts + langchain_messages, config)
        assistant_message = self._process_output_message(message)
@@ -263,7 +264,6 @@ class AgentExecutable(BaseAgentExecutable):
        - `{{{task_management}}}`
        - `{{{doing_tasks}}}`
        - `{{{tool_usage_policy}}}`
-        - `{{{core_memory_instructions}}}`

        The variables from above can have the following nested variables that will be injected:
        - `{{{groups}}}` – a prompt containing the description of the groups.
@@ -291,7 +291,6 @@ class AgentExecutable(BaseAgentExecutable):
            task_management=TASK_MANAGEMENT_PROMPT,
            doing_tasks=DOING_TASKS_PROMPT,
            tool_usage_policy=TOOL_USAGE_POLICY_PROMPT,
-            core_memory_instructions=CORE_MEMORY_INSTRUCTIONS_PROMPT,
        )

    async def _get_billing_prompt(self) -> str:
--- a/ee/hogai/graph/agent_modes/prompts.py
+++ b/ee/hogai/graph/agent_modes/prompts.py
@@ -132,11 +132,6 @@ TOOL_USAGE_POLICY_PROMPT = """
 </tool_usage_policy>
 """.strip()

-CORE_MEMORY_INSTRUCTIONS_PROMPT = """
-{{{core_memory}}}
-New memories will automatically be added to the core memory as the conversation progresses. If users ask to save, update, or delete the core memory, say you have done it. If the '/remember [information]' command is used, the information gets appended verbatim to core memory.
-""".strip()
-
 AGENT_PROMPT = """
 {{{role}}}

@@ -155,8 +150,11 @@ AGENT_PROMPT = """
 {{{tool_usage_policy}}}

 {{{billing_context}}}
+""".strip()

-{{{core_memory_instructions}}}
+AGENT_CORE_MEMORY_PROMPT = """
+{{{core_memory}}}
+New memories will automatically be added to the core memory as the conversation progresses. If users ask to save, update, or delete the core memory, say you have done it. If the '/remember [information]' command is used, the information gets appended verbatim to core memory.
 """.strip()

 # Conditional prompts
--- a/ee/hogai/graph/agent_modes/test/test_nodes.py
+++ b/ee/hogai/graph/agent_modes/test/test_nodes.py
@@ -445,6 +445,46 @@ class TestAgentNode(ClickhouseTestMixin, BaseTest):
            self.assertIn("You are currently in project ", system_content)
            self.assertIn("The user's name appears to be ", system_content)

+    async def test_node_includes_core_memory_in_system_prompt(self):
+        """Test that core memory content is appended to the conversation in system prompts"""
+        with (
+            patch("os.environ", {"ANTHROPIC_API_KEY": "foo"}),
+            patch("langchain_anthropic.chat_models.ChatAnthropic._agenerate") as mock_generate,
+            patch("ee.hogai.graph.agent_modes.nodes.AgentExecutable._aget_core_memory_text") as mock_core_memory,
+        ):
+            mock_core_memory.return_value = "User prefers concise responses and technical details"
+            mock_generate.return_value = ChatResult(
+                generations=[ChatGeneration(message=AIMessage(content="Response"))],
+                llm_output={},
+            )
+
+            node = _create_agent_node(self.team, self.user)
+            config = RunnableConfig(configurable={})
+            node._config = config
+
+            await node.arun(AssistantState(messages=[HumanMessage(content="Test")]), config)
+
+            # Verify _agenerate was called
+            mock_generate.assert_called_once()
+
+            # Get the messages passed to _agenerate
+            call_args = mock_generate.call_args
+            messages = call_args[0][0]
+
+            # Check system messages contain core memory
+            system_messages = [msg for msg in messages if isinstance(msg, SystemMessage)]
+            self.assertGreater(len(system_messages), 0)
+
+            content_parts = []
+            for msg in system_messages:
+                if isinstance(msg.content, str):
+                    content_parts.append(msg.content)
+                else:
+                    content_parts.append(str(msg.content))
+            system_content = "\n\n".join(content_parts)
+
+            self.assertIn("User prefers concise responses and technical details", system_content)
+
    @parameterized.expand(
        [
            # (membership_level, add_context, expected_prompt)
--- a/ee/hogai/utils/anthropic.py
+++ b/ee/hogai/utils/anthropic.py
@@ -1,5 +1,5 @@
 from collections.abc import Mapping, Sequence
-from typing import Any, cast
+from typing import Any, Literal, cast

 from langchain_core import messages
 from langchain_core.messages import BaseMessage
@@ -15,17 +15,22 @@ def get_anthropic_thinking_from_assistant_message(message: AssistantMessage) ->
    return []


-def add_cache_control(message: BaseMessage) -> BaseMessage:
+def add_cache_control(message: BaseMessage, ttl: Literal["5m", "1h"] | None = None) -> BaseMessage:
+    ttl = ttl or "5m"
    if isinstance(message.content, str):
        message.content = [
-            {"type": "text", "text": message.content, "cache_control": {"type": "ephemeral"}},
+            {"type": "text", "text": message.content, "cache_control": {"type": "ephemeral", "ttl": ttl}},
        ]
    if message.content:
        last_content = message.content[-1]
        if isinstance(last_content, str):
-            message.content[-1] = {"type": "text", "text": last_content, "cache_control": {"type": "ephemeral"}}
+            message.content[-1] = {
+                "type": "text",
+                "text": last_content,
+                "cache_control": {"type": "ephemeral", "ttl": ttl},
+            }
        else:
-            last_content["cache_control"] = {"type": "ephemeral"}
+            last_content["cache_control"] = {"type": "ephemeral", "ttl": ttl}
    return message


--- a/ee/hogai/utils/test/test_anthropic.py
+++ b/ee/hogai/utils/test/test_anthropic.py
@@ -53,7 +53,7 @@ class TestAnthropicUtils(BaseTest):
        assert isinstance(message.content[0], dict)
        self.assertEqual(message.content[0]["type"], "text")
        self.assertEqual(message.content[0]["text"], "Test message")
-        self.assertEqual(message.content[0]["cache_control"], {"type": "ephemeral"})
+        self.assertEqual(message.content[0]["cache_control"], {"type": "ephemeral", "ttl": "5m"})

    def test_add_cache_control_list_content_with_string_last(self):
        """Test adding cache control to message with list content ending in string"""
@@ -79,7 +79,7 @@ class TestAnthropicUtils(BaseTest):
        assert isinstance(message.content[1], dict)
        self.assertEqual(message.content[1]["type"], "text")
        self.assertEqual(message.content[1]["text"], "Second part as string")
-        self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral"})
+        self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral", "ttl": "5m"})

    def test_add_cache_control_list_content_with_dict_last(self):
        """Test adding cache control to message with list content ending in dict"""
@@ -103,4 +103,4 @@ class TestAnthropicUtils(BaseTest):
        assert isinstance(message.content[1], dict)
        self.assertEqual(message.content[1]["type"], "image")
        self.assertEqual(message.content[1]["url"], "http://example.com/image.jpg")
-        self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral"})
+        self.assertEqual(message.content[1]["cache_control"], {"type": "ephemeral", "ttl": "5m"})