Files
posthog/dags/llma/daily_metrics/main.py
2025-11-13 13:38:42 +01:00

136 lines
4.6 KiB
Python

"""
Daily aggregation of LLMA (LLM Analytics) metrics.
Aggregates AI event counts from the events table into a daily metrics table
for efficient querying and cost analysis.
"""
from datetime import UTC, datetime, timedelta
import pandas as pd
import dagster
from dagster import BackfillPolicy, DailyPartitionsDefinition
from posthog.clickhouse import query_tagging
from posthog.clickhouse.client import sync_execute
from posthog.clickhouse.cluster import ClickhouseCluster
from dags.common import JobOwners, dagster_tags
from dags.llma.daily_metrics.config import config
from dags.llma.daily_metrics.utils import get_delete_query, get_insert_query
# Partition definition for daily aggregations
partition_def = DailyPartitionsDefinition(start_date=config.partition_start_date, end_offset=1)
# Backfill policy: process N days per run
backfill_policy_def = BackfillPolicy.multi_run(max_partitions_per_run=config.max_partitions_per_run)
# ClickHouse settings for aggregation queries
LLMA_CLICKHOUSE_SETTINGS = {
"max_execution_time": str(config.clickhouse_max_execution_time),
}
@dagster.asset(
name="llma_metrics_daily",
group_name="llma",
partitions_def=partition_def,
backfill_policy=backfill_policy_def,
metadata={"table": config.table_name},
tags={"owner": JobOwners.TEAM_LLMA.value},
)
def llma_metrics_daily(
context: dagster.AssetExecutionContext,
cluster: dagster.ResourceParam[ClickhouseCluster],
) -> None:
"""
Daily aggregation of LLMA metrics.
Aggregates AI event counts ($ai_trace, $ai_generation, $ai_span, $ai_embedding)
by team and date into a long-format metrics table for efficient querying.
Long format allows adding new metrics without schema changes.
"""
query_tagging.get_query_tags().with_dagster(dagster_tags(context))
if not context.partition_time_window:
raise dagster.Failure("This asset should only be run with a partition_time_window")
start_datetime, end_datetime = context.partition_time_window
date_start = start_datetime.strftime("%Y-%m-%d")
date_end = end_datetime.strftime("%Y-%m-%d")
context.log.info(f"Aggregating LLMA metrics for {date_start} to {date_end}")
try:
delete_query = get_delete_query(date_start, date_end)
sync_execute(delete_query, settings=LLMA_CLICKHOUSE_SETTINGS)
insert_query = get_insert_query(date_start, date_end)
context.log.info(f"Metrics query: \n{insert_query}")
sync_execute(insert_query, settings=LLMA_CLICKHOUSE_SETTINGS)
# Query and log the metrics that were just aggregated
metrics_query = f"""
SELECT
metric_name,
count(DISTINCT team_id) as teams,
sum(metric_value) as total_value
FROM {config.table_name}
WHERE date >= '{date_start}' AND date < '{date_end}'
GROUP BY metric_name
ORDER BY metric_name
"""
metrics_results = sync_execute(metrics_query)
if metrics_results:
df = pd.DataFrame(metrics_results, columns=["metric_name", "teams", "total_value"])
context.log.info(f"Aggregated {len(df)} metric types for {date_start}:\n{df.to_string(index=False)}")
else:
context.log.info(f"No AI events found for {date_start}")
context.log.info(f"Successfully aggregated LLMA metrics for {date_start}")
except Exception as e:
raise dagster.Failure(f"Failed to aggregate LLMA metrics: {str(e)}") from e
# Define the job that runs the asset
llma_metrics_daily_job = dagster.define_asset_job(
name="llma_metrics_daily_job",
selection=["llma_metrics_daily"],
tags={
"owner": JobOwners.TEAM_LLMA.value,
"dagster/max_runtime": str(config.job_timeout),
},
)
@dagster.schedule(
cron_schedule=config.cron_schedule,
job=llma_metrics_daily_job,
execution_timezone="UTC",
tags={"owner": JobOwners.TEAM_LLMA.value},
)
def llma_metrics_daily_schedule(context: dagster.ScheduleEvaluationContext):
"""
Runs daily for the previous day's partition.
Schedule configured in dags.llma.config.
This aggregates AI event metrics from the events table into the
llma_metrics_daily table for efficient querying.
"""
# Calculate yesterday's partition
yesterday = (datetime.now(UTC) - timedelta(days=1)).strftime("%Y-%m-%d")
context.log.info(f"Scheduling LLMA metrics aggregation for {yesterday}")
return dagster.RunRequest(
partition_key=yesterday,
run_config={
"ops": {
"llma_metrics_daily": {"config": {}},
}
},
)