feat(llma): add errors tab with materialized $ai_is_error (#41196)

Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
2026-02-04 03:01:23 +01:00 · 2025-11-13 11:59:09 +00:00
parent e86bdeaf95
commit d5f71facc5
17 changed files with 820 additions and 13 deletions
--- a/common/esbuilder/utils.mjs
+++ b/common/esbuilder/utils.mjs
@@ -206,6 +206,7 @@ export const commonConfig = {
        '.woff2': 'file',
        '.mp3': 'file',
        '.lottie': 'file',
+        '.sql': 'text',
    },
    metafile: true,
 }
--- a/common/storybook/webpack.config.js
+++ b/common/storybook/webpack.config.js
@@ -162,6 +162,11 @@ function createEntry(entry) {
                    test: /monaco-editor\/.*\.m?js/,
                    loader: 'babel-loader',
                },
+                {
+                    // Apply rule for .sql files
+                    test: /\.sql$/,
+                    type: 'asset/source',
+                },
            ],
        },
        // add devServer config only to 'main' entry
--- a/frontend/jest.config.ts
+++ b/frontend/jest.config.ts
@@ -105,6 +105,7 @@ const config: Config = {
    // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
    moduleNameMapper: {
        '^.+\\.(css|less|scss|svg|png|lottie)$': '<rootDir>/src/test/mocks/styleMock.js',
+        '^.+\\.sql\\?raw$': '<rootDir>/src/test/mocks/rawFileMock.js',
        '^~/(.*)$': '<rootDir>/src/$1',
        '^@posthog/lemon-ui(|/.*)$': '<rootDir>/@posthog/lemon-ui/src/$1',
        '^@posthog/ee/exports': ['<rootDir>/../ee/frontend/exports', '<rootDir>/@posthog/ee/exports'],
--- a/frontend/src/custom.d.ts
+++ b/frontend/src/custom.d.ts
@@ -40,6 +40,12 @@ declare module '*.json?url' {
    export default content
 }

+// This fixes TS errors when importing an .sql file with ?raw suffix
+declare module '*.sql?raw' {
+    const content: string
+    export default content
+}
+
 // This fixes a TS error where @tiptap/react/menus cannot be found because of our moduleResolution
 declare module '@tiptap/react/menus' {
    export * from '@tiptap/react/dist/menus/index.d.ts'
--- a/frontend/src/lib/constants.tsx
+++ b/frontend/src/lib/constants.tsx
@@ -303,6 +303,7 @@ export const FEATURE_FLAGS = {
    SWITCH_SUBSCRIPTION_PLAN: 'switch-subscription-plan', // owner: @a-lider #team-platform-features
    LLM_ANALYTICS_DATASETS: 'llm-analytics-datasets', // owner: #team-llm-analytics #team-posthog-ai
    LLM_ANALYTICS_SESSIONS_VIEW: 'llm-analytics-sessions-view', // owner: #team-llm-analytics
+    LLM_ANALYTICS_ERRORS_TAB: 'llm-analytics-errors-tab', // owner: #team-llm-analytics
    LLM_ANALYTICS_TEXT_VIEW: 'llm-analytics-text-view', // owner: #team-llm-analytics
    POSTHOG_AI_BILLING_DISPLAY: 'posthog-ai-billing-display', // owner: #team-posthog-ai
    AMPLITUDE_BATCH_IMPORT_OPTIONS: 'amplitude-batch-import-options', // owner: #team-ingestion
--- a/frontend/src/test/mocks/rawFileMock.js
+++ b/frontend/src/test/mocks/rawFileMock.js
@@ -0,0 +1 @@
+module.exports = ''
--- a/posthog/clickhouse/migrations/0176_materialize_ai_is_error.py
+++ b/posthog/clickhouse/migrations/0176_materialize_ai_is_error.py
@@ -0,0 +1,39 @@
+from posthog.clickhouse.client.connection import NodeRole
+from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions
+
+ADD_COLUMN_SHARDED_EVENTS = """
+ALTER TABLE sharded_events
+ADD COLUMN IF NOT EXISTS `mat_$ai_is_error` Nullable(String)
+MATERIALIZED JSONExtract(properties, '$ai_is_error', 'Nullable(String)')
+"""
+
+ADD_COLUMN_EVENTS = """
+ALTER TABLE events
+ADD COLUMN IF NOT EXISTS `mat_$ai_is_error` Nullable(String)
+COMMENT 'column_materializer::properties::$ai_is_error'
+"""
+
+ADD_SET_INDEX_SHARDED_EVENTS = """
+ALTER TABLE sharded_events
+ADD INDEX IF NOT EXISTS `set_$ai_is_error` `mat_$ai_is_error`
+TYPE set(7)
+GRANULARITY 1
+"""
+
+operations = [
+    run_sql_with_exceptions(
+        ADD_COLUMN_SHARDED_EVENTS, node_roles=[NodeRole.DATA], sharded=True, is_alter_on_replicated_table=True
+    ),
+    run_sql_with_exceptions(
+        ADD_COLUMN_EVENTS,
+        node_roles=[NodeRole.DATA, NodeRole.COORDINATOR],
+        sharded=False,
+        is_alter_on_replicated_table=False,
+    ),
+    run_sql_with_exceptions(
+        ADD_SET_INDEX_SHARDED_EVENTS,
+        node_roles=[NodeRole.DATA],
+        sharded=True,
+        is_alter_on_replicated_table=True,
+    ),
+]
--- a/posthog/clickhouse/migrations/max_migration.txt
+++ b/posthog/clickhouse/migrations/max_migration.txt
@@ -1 +1 @@
-0175_precalculated_events
+0176_materialize_ai_is_error
--- a/posthog/hogql/printer.py
+++ b/posthog/hogql/printer.py
@@ -881,17 +881,21 @@ class _Printer(Visitor[str]):
        if left in hack_sessions_timestamp or right in hack_sessions_timestamp:
            not_nullable = True

-        # :HACK: Prevent ifNull() wrapping for $ai_trace_id and $ai_session_id to allow bloom filter index usage
-        # The materialized columns mat_$ai_trace_id and mat_$ai_session_id have bloom filter indexes for performance
+        # :HACK: Prevent ifNull() wrapping for $ai_trace_id, $ai_session_id, and $ai_is_error to allow index usage
+        # The materialized columns mat_$ai_trace_id, mat_$ai_session_id, and mat_$ai_is_error have bloom filter indexes for performance
        if (
            "mat_$ai_trace_id" in left
            or "mat_$ai_trace_id" in right
            or "mat_$ai_session_id" in left
            or "mat_$ai_session_id" in right
+            or "mat_$ai_is_error" in left
+            or "mat_$ai_is_error" in right
            or "$ai_trace_id" in left
            or "$ai_trace_id" in right
            or "$ai_session_id" in left
            or "$ai_session_id" in right
+            or "$ai_is_error" in left
+            or "$ai_is_error" in right
        ):
            not_nullable = True

@@ -1668,10 +1672,10 @@ class _Printer(Visitor[str]):

        materialized_property_source = self.__get_materialized_property_source_for_property_type(type)
        if materialized_property_source is not None:
-            # Special handling for $ai_trace_id and $ai_session_id to avoid nullIf wrapping for bloom filter index optimization
+            # Special handling for $ai_trace_id, $ai_session_id, and $ai_is_error to avoid nullIf wrapping for index optimization
            if (
                len(type.chain) == 1
-                and type.chain[0] in ("$ai_trace_id", "$ai_session_id")
+                and type.chain[0] in ("$ai_trace_id", "$ai_session_id", "$ai_is_error")
                and isinstance(materialized_property_source, PrintableMaterializedColumn)
            ):
                materialized_property_sql = str(materialized_property_source)
--- a/products/llm_analytics/backend/queries/init.py
+++ b/products/llm_analytics/backend/queries/init.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+
+def get_errors_query(filters: str = "true", order_by: str = "last_seen", order_direction: str = "DESC") -> str:
+    """
+    Load and parameterize the errors normalization query from errors.sql.
+
+    Args:
+        filters: HogQL WHERE clause conditions (default: "true")
+        order_by: Column to sort by (default: "last_seen")
+        order_direction: Sort direction "ASC" or "DESC" (default: "DESC")
+
+    Returns:
+        Complete HogQL query string with parameters substituted
+    """
+    query_path = Path(__file__).parent / "errors.sql"
+    with open(query_path) as f:
+        query_template = f.read()
+
+    # Replace template placeholders with actual values
+    return query_template.format(filters=filters, orderBy=order_by, orderDirection=order_direction)
--- a/products/llm_analytics/backend/queries/errors.sql
+++ b/products/llm_analytics/backend/queries/errors.sql
@@ -0,0 +1,159 @@
+/*
+-- Error normalization pipeline:
+
+extract
+--> normalize UUIDs
+--> normalize timestamps
+--> normalize paths
+--> normalize response IDs
+--> normalize tool call IDs
+--> normalize generic IDs
+--> normalize token counts
+--> normalize large numeric IDs
+--> normalize all remaining numbers
+
+-- This multi-step CTE approach makes it easy to understand and maintain each normalization step
+-- Ordered from most specific to least specific to prevent pattern interference
+*/
+
+WITH extracted_errors AS (
+    -- Step 1: Extract error messages from various JSON structures in $ai_error
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$ai_trace_id'), ''), 'null'), '^"|"$', '') as ai_trace_id,
+        replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$ai_session_id'), ''), 'null'), '^"|"$', '') as ai_session_id,
+        CASE
+            -- For common Anthropic format: extract the actual error message text
+            -- This gives us: "Your credit balance is too low..." instead of JSON structure
+            WHEN notEmpty(JSONExtractString(JSONExtractString(JSONExtractString(properties, '$ai_error'), 'error'), 'error'))
+                THEN JSONExtractString(JSONExtractString(JSONExtractString(JSONExtractString(properties, '$ai_error'), 'error'), 'error'), 'message')
+            -- Try nested error.message pattern
+            WHEN notEmpty(JSONExtractString(JSONExtractString(JSONExtractString(properties, '$ai_error'), 'error'), 'message'))
+                THEN JSONExtractString(JSONExtractString(JSONExtractString(properties, '$ai_error'), 'error'), 'message')
+            -- Try direct message field
+            WHEN notEmpty(JSONExtractString(JSONExtractString(properties, '$ai_error'), 'message'))
+                THEN JSONExtractString(JSONExtractString(properties, '$ai_error'), 'message')
+            -- Otherwise keep the raw string as-is to preserve format for matching
+            ELSE JSONExtractString(properties, '$ai_error')
+        END as raw_error
+    FROM events
+    WHERE event IN ('$ai_generation', '$ai_span', '$ai_trace', '$ai_embedding')
+        AND properties.$ai_is_error = 'true'
+        AND {filters}
+),
+uuids_normalized AS (
+    -- Step 2: Normalize UUIDs and request IDs
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(raw_error, '(req_[a-zA-Z0-9]+|[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}})', '<ID>') as error_text
+    FROM extracted_errors
+),
+timestamps_normalized AS (
+    -- Step 3: Normalize ISO timestamps
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, '[0-9]{{4}}-[0-9]{{2}}-[0-9]{{2}}T[0-9]{{2}}:[0-9]{{2}}:[0-9]{{2}}.[0-9]+Z?', '<TIMESTAMP>') as error_text
+    FROM uuids_normalized
+),
+paths_normalized AS (
+    -- Step 4: Normalize cloud resource paths
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, 'projects/[0-9a-z-]+(/[a-z]+/[0-9a-z-]+)+', 'projects/<PATH>') as error_text
+    FROM timestamps_normalized
+),
+response_ids_normalized AS (
+    -- Step 5: Normalize responseId fields in error payloads
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, '"responseId":"[a-zA-Z0-9_-]+"', '"responseId":"<RESPONSE_ID>"') as error_text
+    FROM paths_normalized
+),
+tool_call_ids_normalized AS (
+    -- Step 6: Normalize tool_call_id values
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, 'tool_call_id=[''"][a-zA-Z0-9_-]+[''"]', 'tool_call_id=''<TOOL_CALL_ID>''') as error_text
+    FROM response_ids_normalized
+),
+generic_ids_normalized AS (
+    -- Step 7: Normalize generic ID patterns - catches any id='...' or id="..." pattern
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, '(?i)id=[''"][a-zA-Z0-9_-]+[''"]', 'id=''<ID>''') as error_text
+    FROM tool_call_ids_normalized
+),
+token_counts_normalized AS (
+    -- Step 8: Normalize token count values
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, '"tokenCount":[0-9]+', '"tokenCount":<TOKEN_COUNT>') as error_text
+    FROM generic_ids_normalized
+),
+ids_normalized AS (
+    -- Step 9: Normalize large numeric IDs (9+ digits)
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, '[0-9]{{9,}}', '<ID>') as error_text
+    FROM token_counts_normalized
+),
+all_numbers_normalized AS (
+    -- Step 10: Normalize all remaining numbers as final fallback
+    SELECT
+        distinct_id,
+        timestamp,
+        event,
+        ai_trace_id,
+        ai_session_id,
+        replaceRegexpAll(error_text, '[0-9]+', '<N>') as normalized_error
+    FROM ids_normalized
+)
+SELECT
+    normalized_error as error,
+    countDistinctIf(ai_trace_id, isNotNull(ai_trace_id) AND ai_trace_id != '') as traces,
+    countIf(event = '$ai_generation') as generations,
+    countIf(event = '$ai_span') as spans,
+    countIf(event = '$ai_embedding') as embeddings,
+    countDistinctIf(ai_session_id, isNotNull(ai_session_id) AND ai_session_id != '') as sessions,
+    uniq(distinct_id) as users,
+    uniq(toDate(timestamp)) as days_seen,
+    min(timestamp) as first_seen,
+    max(timestamp) as last_seen
+FROM all_numbers_normalized
+GROUP BY normalized_error
+ORDER BY {orderBy} {orderDirection}
+LIMIT 50
--- a/products/llm_analytics/backend/test/init.py
+++ b/products/llm_analytics/backend/test/init.py
--- a/products/llm_analytics/backend/test/test_error_normalization.py
+++ b/products/llm_analytics/backend/test/test_error_normalization.py
@@ -0,0 +1,291 @@
+"""Tests for LLM Analytics error normalization logic.
+
+These tests verify that the error normalization pipeline correctly groups errors
+that differ only in dynamic values like IDs, timestamps, token counts, etc.
+"""
+
+import uuid
+from datetime import UTC, datetime
+
+from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, flush_persons_and_events
+
+from parameterized import parameterized
+
+from posthog.hogql.query import execute_hogql_query
+
+from products.llm_analytics.backend.queries import get_errors_query
+
+
+class TestErrorNormalization(ClickhouseTestMixin, APIBaseTest):
+    """Test the 10-step error normalization pipeline."""
+
+    def _create_ai_event_with_error(self, error_message: str, distinct_id: str | None = None):
+        """Helper to create an AI event with a specific error message."""
+        if distinct_id is None:
+            distinct_id = f"user_{uuid.uuid4().hex[:8]}"
+
+        return _create_event(
+            team=self.team,
+            event="$ai_generation",
+            distinct_id=distinct_id,
+            properties={
+                "$ai_error": error_message,
+                "$ai_is_error": "true",
+                "$ai_model": "test-model",
+                "$ai_provider": "test-provider",
+            },
+            timestamp=datetime.now(tz=UTC),
+        )
+
+    def _execute_normalization_query(self) -> list:
+        """Execute the error normalization query and return normalized errors."""
+        # Flush events to ClickHouse
+        flush_persons_and_events()
+
+        # Load query from shared errors.sql file and customize for testing
+        base_query = get_errors_query(
+            filters=f"team_id = {self.team.pk}",
+            order_by="generations",
+            order_direction="DESC",
+        )
+
+        # Modify the query to count generations (which our test events are) instead of all metrics
+        # Replace the final SELECT with a simpler version for testing
+        query = base_query.replace(
+            """SELECT
+    normalized_error as error,
+    countDistinctIf(ai_trace_id, isNotNull(ai_trace_id) AND ai_trace_id != '') as traces,
+    countIf(event = '$ai_generation') as generations,
+    countIf(event = '$ai_span') as spans,
+    countIf(event = '$ai_embedding') as embeddings,
+    countDistinctIf(ai_session_id, isNotNull(ai_session_id) AND ai_session_id != '') as sessions,
+    uniq(distinct_id) as users,
+    uniq(toDate(timestamp)) as days_seen,
+    min(timestamp) as first_seen,
+    max(timestamp) as last_seen
+FROM all_numbers_normalized
+GROUP BY normalized_error
+ORDER BY {orderBy} {orderDirection}
+LIMIT 50""",
+            """SELECT
+    normalized_error as error,
+    countIf(event = '$ai_generation') as occurrences
+FROM all_numbers_normalized
+GROUP BY normalized_error
+ORDER BY occurrences DESC""",
+        )
+
+        result = execute_hogql_query(
+            query=query,
+            team=self.team,
+        )
+
+        # Return error and generations count (index 2, not 1 which is traces)
+        # Query returns: (error, traces, generations, spans, embeddings, sessions, users, days_seen, first_seen, last_seen)
+        return [(row[0], row[2]) for row in result.results]
+
+    @parameterized.expand(
+        [
+            # Test Step 2: Large numeric IDs (9+ digits)
+            (
+                "ID normalization",
+                [
+                    "Error in project 1234567890",
+                    "Error in project 9876543210",
+                ],
+                "Error in project <ID>",
+            ),
+            # Test Step 3: UUIDs and request IDs
+            (
+                "UUID normalization",
+                [
+                    "Request req_abc123def456 failed",
+                    "Request req_xyz789ghi012 failed",
+                ],
+                "Request <ID> failed",
+            ),
+            (
+                "UUID format normalization",
+                [
+                    "Error 550e8400-e29b-41d4-a716-446655440000 occurred",
+                    "Error 123e4567-e89b-12d3-a456-426614174000 occurred",
+                ],
+                "Error <ID> occurred",
+            ),
+            # Test Step 4: ISO timestamps
+            (
+                "Timestamp normalization",
+                [
+                    "Timeout at 2025-11-08T14:25:51.767Z",
+                    "Timeout at 2025-11-09T10:30:22.123Z",
+                ],
+                "Timeout at <TIMESTAMP>",
+            ),
+            # Test Step 5: Cloud resource paths
+            (
+                "GCP path normalization",
+                [
+                    "Model projects/123/locations/us-west2/publishers/google/models/gemini-pro not found",
+                    "Model projects/456/locations/europe-west1/publishers/google/models/claude-2 not found",
+                ],
+                "Model projects/<PATH> not found",
+            ),
+            # Test Step 6: Response IDs
+            (
+                "Response ID normalization",
+                [
+                    'API error: "responseId":"h2sPacmZI4OWvPEPvIS16Ac"',
+                    'API error: "responseId":"abcXYZ123def456GHI789"',
+                ],
+                'API error: "responseId":"<RESPONSE_ID>"',
+            ),
+            # Test Step 7: Tool call IDs
+            (
+                "Tool call ID normalization",
+                [
+                    "tool_call_id='toolu_01LCbNr67BxhgUH6gndPCELW' failed",
+                    "tool_call_id='toolu_99XYZabcDEF123ghiJKL456' failed",
+                ],
+                "tool_call_id='<TOOL_CALL_ID>' failed",
+            ),
+            # Test Step 8: Generic IDs (any alphanumeric pattern in id='...')
+            (
+                "Generic ID normalization",
+                [
+                    "Error with id='e8631f8c4650120cd5848570185bbcd7' occurred",
+                    "Error with id='a1b2c3d4e5f6a0b1c2d3e4f5abcdef01' occurred",
+                    "Error with id='s1' occurred",
+                    "Error with id='user_abc123' occurred",
+                ],
+                "Error with id='<ID>' occurred",
+            ),
+            # Test Step 9: Token counts
+            (
+                "Token count normalization",
+                [
+                    'Limit exceeded: "tokenCount":7125',
+                    'Limit exceeded: "tokenCount":15000',
+                ],
+                'Limit exceeded: "tokenCount":<TOKEN_COUNT>',
+            ),
+            # Test Step 10: All remaining numbers
+            (
+                "General number normalization",
+                [
+                    "Expected 2 arguments but got 5",
+                    "Expected 10 arguments but got 15",
+                ],
+                "Expected <N> arguments but got <N>",
+            ),
+            (
+                "Port number normalization",
+                [
+                    "Connection refused on port 8080",
+                    "Connection refused on port 3000",
+                ],
+                "Connection refused on port <N>",
+            ),
+            (
+                "HTTP status code normalization",
+                [
+                    "Request failed with status 429",
+                    "Request failed with status 500",
+                ],
+                "Request failed with status <N>",
+            ),
+        ]
+    )
+    def test_error_normalization_step(self, test_name, error_variants, expected_normalized):
+        """Test that error variants are normalized to the same canonical form."""
+        # Create events with different error variants
+        for error in error_variants:
+            self._create_ai_event_with_error(error)
+
+        # Execute normalization query
+        results = self._execute_normalization_query()
+
+        # Should have exactly one normalized error
+        assert len(results) == 1, f"{test_name}: Expected 1 normalized error, got {len(results)}: {results}"
+
+        normalized_error, occurrence_count = results[0]
+
+        # Check it matches expected pattern
+        assert (
+            normalized_error == expected_normalized
+        ), f"{test_name}: Expected '{expected_normalized}', got '{normalized_error}'"
+
+        # Check all variants were grouped together
+        assert occurrence_count == len(
+            error_variants
+        ), f"{test_name}: Expected {len(error_variants)} occurrences, got {occurrence_count}"
+
+    def test_complex_error_with_multiple_normalizations(self):
+        """Test that errors requiring multiple normalization steps are handled correctly."""
+        error_variants = [
+            # Use single quotes in test data to match normalization regex
+            'Error at 2025-11-08T14:25:51.767Z in project 1234567890: "responseId":"abc123", "tokenCount":5000, tool_call_id=\'toolu_XYZ\' (status 429)',
+            'Error at 2025-11-09T10:30:22.123Z in project 9876543210: "responseId":"def456", "tokenCount":7500, tool_call_id=\'toolu_ABC\' (status 500)',
+        ]
+
+        expected = 'Error at <TIMESTAMP> in project <ID>: "responseId":"<RESPONSE_ID>", "tokenCount":<TOKEN_COUNT>, tool_call_id=\'<TOOL_CALL_ID>\' (status <N>)'
+
+        for error in error_variants:
+            self._create_ai_event_with_error(error)
+
+        results = self._execute_normalization_query()
+
+        assert len(results) == 1, f"Expected 1 normalized error, got {len(results)}"
+        assert results[0][0] == expected
+        assert results[0][1] == len(error_variants)
+
+    def test_normalization_preserves_error_identity(self):
+        """Test that different errors don't get incorrectly grouped together."""
+        errors = [
+            "Connection timeout",  # Different base error
+            "Connection refused",  # Different base error
+            "Authentication failed",  # Different base error
+        ]
+
+        for error in errors:
+            self._create_ai_event_with_error(error)
+
+        results = self._execute_normalization_query()
+
+        # Should have 3 distinct normalized errors
+        assert len(results) == 3, f"Expected 3 distinct errors, got {len(results)}: {results}"
+
+        # Each should appear once
+        for _, count in results:
+            assert count == 1
+
+    def test_empty_or_null_errors_handled(self):
+        """Test that empty or null errors are handled gracefully."""
+        # Create events with various empty/null error values
+        _create_event(
+            team=self.team,
+            event="$ai_generation",
+            distinct_id="user_1",
+            properties={
+                "$ai_error": "",
+                "$ai_is_error": "true",
+                "$ai_model": "test",
+            },
+        )
+
+        _create_event(
+            team=self.team,
+            event="$ai_generation",
+            distinct_id="user_2",
+            properties={
+                "$ai_error": "null",
+                "$ai_is_error": "true",
+                "$ai_model": "test",
+            },
+        )
+
+        # Query should not crash
+        results = self._execute_normalization_query()
+
+        # Should filter out empty/null errors or group them
+        # Either way, query should complete successfully
+        assert isinstance(results, list)
--- a/products/llm_analytics/frontend/LLMAnalyticsErrors.tsx
+++ b/products/llm_analytics/frontend/LLMAnalyticsErrors.tsx
@@ -0,0 +1,175 @@
+import { useActions, useValues } from 'kea'
+import { combineUrl, router } from 'kea-router'
+
+import { IconCopy } from '@posthog/icons'
+
+import { LemonButton } from 'lib/lemon-ui/LemonButton'
+import { Link } from 'lib/lemon-ui/Link'
+import { Tooltip } from 'lib/lemon-ui/Tooltip'
+import { copyToClipboard } from 'lib/utils/copyToClipboard'
+import { urls } from 'scenes/urls'
+
+import { DataTable } from '~/queries/nodes/DataTable/DataTable'
+import { isHogQLQuery } from '~/queries/utils'
+import { PropertyFilterType, PropertyOperator } from '~/types'
+
+import { useSortableColumns } from './hooks/useSortableColumns'
+import { llmAnalyticsLogic } from './llmAnalyticsLogic'
+
+export function LLMAnalyticsErrors(): JSX.Element {
+    const { setDates, setShouldFilterTestAccounts, setPropertyFilters, setErrorsSort } = useActions(llmAnalyticsLogic)
+    const { errorsQuery, errorsSort } = useValues(llmAnalyticsLogic)
+    const { searchParams } = useValues(router)
+
+    const { renderSortableColumnTitle } = useSortableColumns(errorsSort, setErrorsSort)
+
+    return (
+        <DataTable
+            query={{
+                ...errorsQuery,
+                showSavedFilters: true,
+            }}
+            setQuery={(query) => {
+                if (!isHogQLQuery(query.source)) {
+                    console.warn('LLMAnalyticsErrors received a non-HogQL query:', query.source)
+                    return
+                }
+                const { filters = {} } = query.source
+                const { dateRange = {} } = filters
+                setDates(dateRange.date_from || null, dateRange.date_to || null)
+                setShouldFilterTestAccounts(filters.filterTestAccounts || false)
+                setPropertyFilters(filters.properties || [])
+            }}
+            context={{
+                columns: {
+                    error: {
+                        renderTitle: () => (
+                            <Tooltip title="Normalized error message with IDs, timestamps, and numbers replaced by placeholders for grouping">
+                                <span>Error</span>
+                            </Tooltip>
+                        ),
+                        render: function RenderError(x) {
+                            const errorValue = x.value
+                            if (!errorValue || errorValue === 'null' || errorValue === '') {
+                                return <span className="text-muted">No error</span>
+                            }
+
+                            const errorString = String(errorValue)
+                            const displayValue =
+                                errorString.length > 80 ? errorString.slice(0, 77) + '...' : errorString
+
+                            // Extract the first 3 chunks of text between placeholders for filtering
+                            // These chunks are the stable parts of the error message
+                            const tokens = errorString
+                                .split(/<ID>|<TIMESTAMP>|<PATH>|<RESPONSE_ID>|<TOOL_CALL_ID>|<TOKEN_COUNT>|<N>/)
+                                .map((token) => token.trim())
+                                .filter((token) => token.length >= 3) // Only keep meaningful chunks
+                                .slice(0, 3) // Take first 3 chunks
+
+                            return (
+                                <div className="flex items-center gap-1">
+                                    <Tooltip title={errorString}>
+                                        <Link
+                                            to={
+                                                combineUrl(urls.llmAnalyticsTraces(), {
+                                                    ...searchParams,
+                                                    filters: [
+                                                        // First filter: only show traces with errors
+                                                        {
+                                                            type: PropertyFilterType.Event,
+                                                            key: '$ai_is_error',
+                                                            operator: PropertyOperator.Exact,
+                                                            value: 'true',
+                                                        },
+                                                        // Then filter by key words from the error
+                                                        ...tokens.map((token) => ({
+                                                            type: PropertyFilterType.Event,
+                                                            key: '$ai_error',
+                                                            operator: PropertyOperator.IContains,
+                                                            value: token,
+                                                        })),
+                                                    ],
+                                                }).url
+                                            }
+                                            className="font-mono text-sm"
+                                        >
+                                            {displayValue}
+                                        </Link>
+                                    </Tooltip>
+                                    <LemonButton
+                                        size="xsmall"
+                                        noPadding
+                                        icon={<IconCopy />}
+                                        onClick={(e) => {
+                                            e.preventDefault()
+                                            e.stopPropagation()
+                                            copyToClipboard(errorString, 'error')
+                                        }}
+                                        tooltip="Copy error to clipboard"
+                                        className="opacity-50 hover:opacity-100"
+                                    />
+                                </div>
+                            )
+                        },
+                    },
+                    first_seen: {
+                        renderTitle: () => renderSortableColumnTitle('first_seen', 'First Seen'),
+                    },
+                    last_seen: {
+                        renderTitle: () => renderSortableColumnTitle('last_seen', 'Last Seen'),
+                    },
+                    traces: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of unique traces with this error">
+                                {renderSortableColumnTitle('traces', 'Traces')}
+                            </Tooltip>
+                        ),
+                    },
+                    generations: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of generations with this error">
+                                {renderSortableColumnTitle('generations', 'Generations')}
+                            </Tooltip>
+                        ),
+                    },
+                    spans: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of spans with this error">
+                                {renderSortableColumnTitle('spans', 'Spans')}
+                            </Tooltip>
+                        ),
+                    },
+                    embeddings: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of embeddings with this error">
+                                {renderSortableColumnTitle('embeddings', 'Embeddings')}
+                            </Tooltip>
+                        ),
+                    },
+                    sessions: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of unique sessions with this error">
+                                {renderSortableColumnTitle('sessions', 'Sessions')}
+                            </Tooltip>
+                        ),
+                    },
+                    users: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of unique users who encountered this error">
+                                {renderSortableColumnTitle('users', 'Users')}
+                            </Tooltip>
+                        ),
+                    },
+                    days_seen: {
+                        renderTitle: () => (
+                            <Tooltip title="Number of distinct days this error occurred">
+                                {renderSortableColumnTitle('days_seen', 'Days Seen')}
+                            </Tooltip>
+                        ),
+                    },
+                },
+            }}
+            uniqueKey="llm-analytics-errors"
+        />
+    )
+}
--- a/products/llm_analytics/frontend/LLMAnalyticsScene.tsx
+++ b/products/llm_analytics/frontend/LLMAnalyticsScene.tsx
@@ -45,6 +45,7 @@ import { InsightVizNode, NodeKind } from '~/queries/schema/schema-general'
 import { isEventsQuery } from '~/queries/utils'
 import { DashboardPlacement, EventType } from '~/types'

+import { LLMAnalyticsErrors } from './LLMAnalyticsErrors'
 import { LLMAnalyticsPlaygroundScene } from './LLMAnalyticsPlaygroundScene'
 import { LLMAnalyticsReloadAction } from './LLMAnalyticsReloadAction'
 import { LLMAnalyticsSessionsScene } from './LLMAnalyticsSessionsScene'
@@ -648,6 +649,27 @@ export function LLMAnalyticsScene(): JSX.Element {
        },
    ]

+    if (featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_ERRORS_TAB]) {
+        tabs.push({
+            key: 'errors',
+            label: (
+                <>
+                    Errors{' '}
+                    <LemonTag className="ml-1" type="warning">
+                        Beta
+                    </LemonTag>
+                </>
+            ),
+            content: (
+                <LLMAnalyticsSetupPrompt>
+                    <LLMAnalyticsErrors />
+                </LLMAnalyticsSetupPrompt>
+            ),
+            link: combineUrl(urls.llmAnalyticsErrors(), searchParams).url,
+            'data-attr': 'errors-tab',
+        })
+    }
+
    if (featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_SESSIONS_VIEW]) {
        tabs.push({
            key: 'sessions',
--- a/products/llm_analytics/frontend/llmAnalyticsLogic.tsx
+++ b/products/llm_analytics/frontend/llmAnalyticsLogic.tsx
@@ -35,6 +35,7 @@ import {
    PropertyOperator,
 } from '~/types'

+import errorsQueryTemplate from '../backend/queries/errors.sql?raw'
 import type { llmAnalyticsLogicType } from './llmAnalyticsLogicType'

 export const LLM_ANALYTICS_DATA_COLLECTION_NODE_ID = 'llm-analytics-data'
@@ -50,7 +51,7 @@ export function getDefaultGenerationsColumns(showInputOutput: boolean): string[]
        ...(showInputOutput ? ['properties.$ai_input[-1]', 'properties.$ai_output_choices'] : []),
        'person',
        "f'{properties.$ai_model}' -- Model",
-        "if(notEmpty(properties.$ai_error) OR properties.$ai_is_error = 'true', '❌', '') -- Error",
+        "if(properties.$ai_is_error = 'true', '❌', '') -- Error",
        "f'{round(toFloat(properties.$ai_latency), 2)} s' -- Latency",
        "f'{properties.$ai_input_tokens} → {properties.$ai_output_tokens} (∑ {toInt(properties.$ai_input_tokens) + toInt(properties.$ai_output_tokens)})' -- Token usage",
        "f'${round(toFloat(properties.$ai_total_cost_usd), 6)}' -- Cost",
@@ -109,6 +110,7 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
        setTracesQuery: (query: DataTableNode) => ({ query }),
        setSessionsSort: (column: string, direction: 'ASC' | 'DESC') => ({ column, direction }),
        setUsersSort: (column: string, direction: 'ASC' | 'DESC') => ({ column, direction }),
+        setErrorsSort: (column: string, direction: 'ASC' | 'DESC') => ({ column, direction }),
        setGenerationsSort: (column: string, direction: 'ASC' | 'DESC') => ({ column, direction }),
        refreshAllDashboardItems: true,
        setRefreshStatus: (tileId: string, loading?: boolean) => ({ tileId, loading }),
@@ -204,6 +206,13 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
            },
        ],

+        errorsSort: [
+            { column: 'traces', direction: 'DESC' } as { column: string; direction: 'ASC' | 'DESC' },
+            {
+                setErrorsSort: (_, { column, direction }) => ({ column, direction }),
+            },
+        ],
+
        refreshStatus: [
            {} as Record<string, { loading?: boolean; timer?: Date }>,
            {
@@ -541,6 +550,8 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
                    return 'traces'
                } else if (sceneKey === 'llmAnalyticsUsers') {
                    return 'users'
+                } else if (sceneKey === 'llmAnalyticsErrors') {
+                    return 'errors'
                } else if (sceneKey === 'llmAnalyticsSessions') {
                    return 'sessions'
                } else if (sceneKey === 'llmAnalyticsPlayground') {
@@ -1141,6 +1152,72 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
                allowSorting: true,
            }),
        ],
+        errorsQuery: [
+            (s) => [
+                s.dateFilter,
+                s.shouldFilterTestAccounts,
+                s.propertyFilters,
+                s.errorsSort,
+                groupsModel.selectors.groupsTaxonomicTypes,
+            ],
+            (
+                dateFilter: { dateFrom: string | null; dateTo: string | null },
+                shouldFilterTestAccounts: boolean,
+                propertyFilters: AnyPropertyFilter[],
+                errorsSort: { column: string; direction: 'ASC' | 'DESC' },
+                groupsTaxonomicTypes: TaxonomicFilterGroupType[]
+            ): DataTableNode => {
+                // Use the shared query template
+                // The SQL template uses Python's .format() escaping ({{ for literal {), so normalize those for HogQL
+                const query = errorsQueryTemplate
+                    .replace(/\{\{/g, '{')
+                    .replace(/\}\}/g, '}')
+                    .replace('{orderBy}', errorsSort.column)
+                    .replace('{orderDirection}', errorsSort.direction)
+
+                return {
+                    kind: NodeKind.DataTableNode,
+                    source: {
+                        kind: NodeKind.HogQLQuery,
+                        query,
+                        filters: {
+                            dateRange: {
+                                date_from: dateFilter.dateFrom || null,
+                                date_to: dateFilter.dateTo || null,
+                            },
+                            filterTestAccounts: shouldFilterTestAccounts,
+                            properties: propertyFilters,
+                        },
+                    },
+                    columns: [
+                        'error',
+                        'traces',
+                        'generations',
+                        'spans',
+                        'embeddings',
+                        'sessions',
+                        'users',
+                        'days_seen',
+                        'first_seen',
+                        'last_seen',
+                    ],
+                    showDateRange: true,
+                    showReload: true,
+                    showSearch: true,
+                    showPropertyFilter: [
+                        TaxonomicFilterGroupType.EventProperties,
+                        TaxonomicFilterGroupType.PersonProperties,
+                        ...groupsTaxonomicTypes,
+                        TaxonomicFilterGroupType.Cohorts,
+                        TaxonomicFilterGroupType.HogQLExpression,
+                    ],
+                    showTestAccountFilters: true,
+                    showExport: true,
+                    showColumnConfigurator: true,
+                    allowSorting: true,
+                }
+            },
+        ],
        sessionsQuery: [
            (s) => [
                s.dateFilter,
@@ -1150,11 +1227,11 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
                groupsModel.selectors.groupsTaxonomicTypes,
            ],
            (
-                dateFilter,
-                shouldFilterTestAccounts,
-                propertyFilters,
-                sessionsSort,
-                groupsTaxonomicTypes
+                dateFilter: { dateFrom: string | null; dateTo: string | null },
+                shouldFilterTestAccounts: boolean,
+                propertyFilters: AnyPropertyFilter[],
+                sessionsSort: { column: string; direction: 'ASC' | 'DESC' },
+                groupsTaxonomicTypes: TaxonomicFilterGroupType[]
            ): DataTableNode => ({
                kind: NodeKind.DataTableNode,
                source: {
@@ -1166,7 +1243,7 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
                    countIf(event = '$ai_span') as spans,
                    countIf(event = '$ai_generation') as generations,
                    countIf(event = '$ai_embedding') as embeddings,
-                    countIf(isNotNull(properties.$ai_error) OR properties.$ai_is_error = 'true') as errors,
+                    countIf(properties.$ai_is_error = 'true') as errors,
                    round(sum(toFloat(properties.$ai_total_cost_usd)), 4) as total_cost,
                    round(sum(toFloat(properties.$ai_latency)), 2) as total_latency,
                    min(timestamp) as first_seen,
@@ -1219,7 +1296,8 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
        ],
        isRefreshing: [
            (s) => [s.refreshStatus],
-            (refreshStatus) => Object.values(refreshStatus).some((status) => status.loading),
+            (refreshStatus: Record<string, { loading?: boolean; timer?: Date }>) =>
+                Object.values(refreshStatus).some((status) => status.loading),
        ],
        breadcrumbs: [
            () => [],
@@ -1266,6 +1344,7 @@ export const llmAnalyticsLogic = kea<llmAnalyticsLogicType>([
            [urls.llmAnalyticsGenerations()]: (_, searchParams) => applySearchParams(searchParams),
            [urls.llmAnalyticsTraces()]: (_, searchParams) => applySearchParams(searchParams),
            [urls.llmAnalyticsUsers()]: (_, searchParams) => applySearchParams(searchParams),
+            [urls.llmAnalyticsErrors()]: (_, searchParams) => applySearchParams(searchParams),
            [urls.llmAnalyticsSessions()]: (_, searchParams) => applySearchParams(searchParams),
            [urls.llmAnalyticsPlayground()]: (_, searchParams) => applySearchParams(searchParams),
        }
--- a/products/llm_analytics/manifest.tsx
+++ b/products/llm_analytics/manifest.tsx
@@ -84,6 +84,7 @@ export const manifest: ProductManifest = {
        '/llm-analytics/traces': ['LLMAnalytics', 'llmAnalyticsTraces'],
        '/llm-analytics/traces/:id': ['LLMAnalyticsTrace', 'llmAnalytics'],
        '/llm-analytics/users': ['LLMAnalytics', 'llmAnalyticsUsers'],
+        '/llm-analytics/errors': ['LLMAnalytics', 'llmAnalyticsErrors'],
        '/llm-analytics/sessions': ['LLMAnalytics', 'llmAnalyticsSessions'],
        '/llm-analytics/sessions/:id': ['LLMAnalyticsSession', 'llmAnalytics'],
        '/llm-analytics/playground': ['LLMAnalytics', 'llmAnalyticsPlayground'],
@@ -126,6 +127,7 @@ export const manifest: ProductManifest = {
            return `/llm-analytics/traces/${id}${stringifiedParams ? `?${stringifiedParams}` : ''}`
        },
        llmAnalyticsUsers: (): string => '/llm-analytics/users',
+        llmAnalyticsErrors: (): string => '/llm-analytics/errors',
        llmAnalyticsSessions: (): string => '/llm-analytics/sessions',
        llmAnalyticsSession: (
            id: string,