diff --git a/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr index 47e8009776..3d0af20bef 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr @@ -171,8 +171,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -180,6 +178,25 @@ # --- # name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -188,7 +205,7 @@ AND version = 0 ''' # --- -# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.2 +# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.3 ''' (SELECT cohort_people.person_id AS id @@ -269,7 +286,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.3 +# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.4 ''' SELECT if(funnel_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, funnel_query.person_id) AS id @@ -341,8 +358,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -350,12 +365,21 @@ # --- # name: TestCohortQuery.test_cohort_filter_with_extra.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC ''' # --- # name: TestCohortQuery.test_cohort_filter_with_extra.10 @@ -638,6 +662,16 @@ ''' # --- # name: TestCohortQuery.test_cohort_filter_with_extra.2 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- +# name: TestCohortQuery.test_cohort_filter_with_extra.3 ''' (SELECT cohort_people.person_id AS id @@ -684,7 +718,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestCohortQuery.test_cohort_filter_with_extra.3 +# name: TestCohortQuery.test_cohort_filter_with_extra.4 ''' SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id @@ -726,7 +760,7 @@ join_algorithm = 'auto' ''' # --- -# name: TestCohortQuery.test_cohort_filter_with_extra.4 +# name: TestCohortQuery.test_cohort_filter_with_extra.5 ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, @@ -740,14 +774,31 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC ''' # --- -# name: TestCohortQuery.test_cohort_filter_with_extra.5 +# name: TestCohortQuery.test_cohort_filter_with_extra.6 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestCohortQuery.test_cohort_filter_with_extra.7 ''' SELECT count(DISTINCT person_id) @@ -757,99 +808,6 @@ AND version = 0 ''' # --- -# name: TestCohortQuery.test_cohort_filter_with_extra.6 - ''' - ( - (SELECT persons.id AS id - FROM - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name, - person.id AS id - FROM person - WHERE and(equals(person.team_id, 99999), in(id, - (SELECT where_optimization.id AS id - FROM person AS where_optimization - WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, 'name'), ''), 'null'), '^"|"$', ''), 'test'), 0))))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons - WHERE ifNull(equals(persons.properties___name, 'test'), 0) - ORDER BY persons.id ASC - LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) - UNION DISTINCT ( - (SELECT source.id AS id - FROM - (SELECT actor_id AS actor_id, - count() AS event_count, - groupUniqArray(distinct_id) AS event_distinct_ids, - actor_id AS id - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS actor_id, - toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.uuid AS uuid, - e.distinct_id AS distinct_id - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('today', 6, 'UTC')), equals(e.event, '$pageview'))) - GROUP BY actor_id) AS source - ORDER BY source.id ASC - LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto')) SETTINGS readonly=2, - max_execution_time=600, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - transform_null_in=1, - optimize_min_equality_disjunction_chain_length=4294967295, - allow_experimental_join_condition=1 - ''' -# --- -# name: TestCohortQuery.test_cohort_filter_with_extra.7 - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT if(not(empty(pdi.distinct_id)), pdi.person_id, e.person_id) AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(person_props, 'name'), '^"|"$', '')))) - OR ((coalesce(performed_event_condition_None_level_level_0_level_1_level_0_0, false))))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- # name: TestCohortQuery.test_cohort_filter_with_extra.8 ''' ( @@ -1972,8 +1930,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -1981,6 +1937,25 @@ # --- # name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -1989,45 +1964,6 @@ AND version = 0 ''' # --- -# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.2 - ''' - - (SELECT cohort_people.person_id AS id - FROM - (SELECT DISTINCT cohortpeople.person_id AS person_id, - cohortpeople.cohort_id AS cohort_id, - cohortpeople.team_id AS team_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 99999), in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(99999, 0)]))) AS cohort_people - WHERE and(ifNull(equals(cohort_people.cohort_id, 99999), 0), ifNull(equals(cohort_people.team_id, 99999), 0)) - LIMIT 1000000000) - UNION DISTINCT - (SELECT persons.id AS id - FROM - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name, - person.id AS id - FROM person - WHERE and(equals(person.team_id, 99999), in(id, - (SELECT where_optimization.id AS id - FROM person AS where_optimization - WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, 'name'), ''), 'null'), '^"|"$', ''), 'test2'), 0))))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons - WHERE ifNull(equals(persons.properties___name, 'test2'), 0) - ORDER BY persons.id ASC - LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, - join_algorithm='auto') SETTINGS readonly=2, - max_execution_time=600, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - transform_null_in=1, - optimize_min_equality_disjunction_chain_length=4294967295, - allow_experimental_join_condition=1 - ''' -# --- # name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.3 ''' @@ -2068,6 +2004,45 @@ ''' # --- # name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.4 + ''' + + (SELECT cohort_people.person_id AS id + FROM + (SELECT DISTINCT cohortpeople.person_id AS person_id, + cohortpeople.cohort_id AS cohort_id, + cohortpeople.team_id AS team_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 99999), in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(99999, 0)]))) AS cohort_people + WHERE and(ifNull(equals(cohort_people.cohort_id, 99999), 0), ifNull(equals(cohort_people.team_id, 99999), 0)) + LIMIT 1000000000) + UNION DISTINCT + (SELECT persons.id AS id + FROM + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name, + person.id AS id + FROM person + WHERE and(equals(person.team_id, 99999), in(id, + (SELECT where_optimization.id AS id + FROM person AS where_optimization + WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, 'name'), ''), 'null'), '^"|"$', ''), 'test2'), 0))))) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons + WHERE ifNull(equals(persons.properties___name, 'test2'), 0) + ORDER BY persons.id ASC + LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1, + join_algorithm='auto') SETTINGS readonly=2, + max_execution_time=600, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.5 ''' SELECT person.person_id AS id @@ -2314,8 +2289,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -2323,6 +2296,25 @@ # --- # name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -2331,7 +2323,7 @@ AND version = 0 ''' # --- -# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.2 +# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.3 ''' (SELECT cohort_people.person_id AS id @@ -2379,7 +2371,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.3 +# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.4 ''' SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id diff --git a/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr index 50e9f67eae..62f92dc9a6 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr @@ -13,8 +13,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2021-01-21' AND event_time >= '2021-01-21 00:00:00' ORDER BY event_time DESC @@ -22,6 +20,25 @@ # --- # name: TestEventQuery.test_account_filters.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2021-01-21' + AND event_time >= '2021-01-21 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestEventQuery.test_account_filters.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -30,7 +47,7 @@ AND version = 0 ''' # --- -# name: TestEventQuery.test_account_filters.2 +# name: TestEventQuery.test_account_filters.3 ''' SELECT e.timestamp as timestamp, if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id diff --git a/posthog/api/test/__snapshots__/test_feature_flag.ambr b/posthog/api/test/__snapshots__/test_feature_flag.ambr index ff86f659bd..3c61d56f9b 100644 --- a/posthog/api/test/__snapshots__/test_feature_flag.ambr +++ b/posthog/api/test/__snapshots__/test_feature_flag.ambr @@ -131,8 +131,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -140,16 +138,6 @@ # --- # name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.2 - ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, query_duration_ms, @@ -162,14 +150,12 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC ''' # --- -# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.3 +# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.2 ''' SELECT count(DISTINCT person_id) @@ -179,7 +165,55 @@ AND version = 0 ''' # --- +# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.3 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- # name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.4 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.5 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- +# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.6 ''' /* user_id:0 request:_snapshot_ */ SELECT count(1) @@ -203,7 +237,7 @@ HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) ''' # --- -# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.5 +# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.7 ''' /* user_id:0 request:_snapshot_ */ SELECT count(1) @@ -267,8 +301,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -288,8 +320,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -297,6 +327,44 @@ # --- # name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.4 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.5 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.6 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -305,7 +373,7 @@ AND version = 0 ''' # --- -# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.5 +# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.7 ''' /* user_id:0 request:_snapshot_ */ SELECT count(1) @@ -374,8 +442,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -383,6 +449,25 @@ # --- # name: TestBlastRadius.test_user_blast_radius_with_single_cohort.3 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.4 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -391,7 +476,7 @@ AND version = 0 ''' # --- -# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.4 +# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.5 ''' /* user_id:0 request:_snapshot_ */ SELECT count(1) diff --git a/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_base.ambr b/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_base.ambr index 2f0c7eb20f..9ed5f6f637 100644 --- a/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_base.ambr +++ b/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_base.ambr @@ -2528,8 +2528,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -2537,6 +2535,25 @@ # --- # name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -2545,7 +2562,7 @@ AND version = 0 ''' # --- -# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.2 +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.3 ''' SELECT metric_events.variant AS variant, count(metric_events.entity_id) AS num_users, @@ -2610,7 +2627,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.3 +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.4 ''' SELECT metric_events.variant AS variant, count(metric_events.entity_id) AS num_users, diff --git a/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_data_warehouse_metric.ambr b/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_data_warehouse_metric.ambr index 4cb304b984..1e24e199ea 100644 --- a/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_data_warehouse_metric.ambr +++ b/posthog/hogql_queries/experiments/test/experiment_query_runner/__snapshots__/test_data_warehouse_metric.ambr @@ -973,8 +973,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -982,6 +980,25 @@ # --- # name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -990,7 +1007,7 @@ AND version = 0 ''' # --- -# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.2 +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.3 ''' SELECT metric_events.variant AS variant, count(metric_events.entity_id) AS num_users, @@ -1046,7 +1063,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.3 +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.4 ''' SELECT metric_events.variant AS variant, count(metric_events.entity_id) AS num_users, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 6a5aaa970e..0393ca3b09 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -456,8 +456,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -465,6 +463,25 @@ # --- # name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -473,7 +490,7 @@ AND version = 0 ''' # --- -# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2 +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.3 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr index a531c03dab..6bd32ce96b 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr @@ -355,8 +355,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -364,6 +362,25 @@ # --- # name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -372,7 +389,7 @@ AND version = 0 ''' # --- -# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.2 +# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.3 ''' SELECT sum(step_1) AS step_1, sum(step_2) AS step_2, diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index 6f605a6670..35ad293b6c 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -13,8 +13,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -22,6 +20,25 @@ # --- # name: TestLifecycleQueryRunner.test_cohort_filter.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestLifecycleQueryRunner.test_cohort_filter.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -30,7 +47,7 @@ AND version = 0 ''' # --- -# name: TestLifecycleQueryRunner.test_cohort_filter.2 +# name: TestLifecycleQueryRunner.test_cohort_filter.3 ''' SELECT groupArray(start_of_period) AS date, groupArray(counts) AS total, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr index 27cbb7a16d..0b800ded96 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr @@ -209,8 +209,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -218,6 +216,25 @@ # --- # name: TestFormula.test_breakdown_cohort.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestFormula.test_breakdown_cohort.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -226,7 +243,7 @@ AND version = 0 ''' # --- -# name: TestFormula.test_breakdown_cohort.2 +# name: TestFormula.test_breakdown_cohort.3 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, @@ -269,7 +286,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestFormula.test_breakdown_cohort.3 +# name: TestFormula.test_breakdown_cohort.4 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, @@ -312,7 +329,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestFormula.test_breakdown_cohort.4 +# name: TestFormula.test_breakdown_cohort.5 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, @@ -365,7 +382,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestFormula.test_breakdown_cohort.5 +# name: TestFormula.test_breakdown_cohort.6 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 9db7d76627..01aac8d9d6 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -13,8 +13,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -22,16 +20,6 @@ # --- # name: TestTrends.test_action_filtering_with_cohort.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestTrends.test_action_filtering_with_cohort.2 - ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, query_duration_ms, @@ -44,15 +32,61 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC ''' # --- +# name: TestTrends.test_action_filtering_with_cohort.2 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- # name: TestTrends.test_action_filtering_with_cohort.3 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_action_filtering_with_cohort.4 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_action_filtering_with_cohort.5 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -61,7 +95,7 @@ AND version = 2 ''' # --- -# name: TestTrends.test_action_filtering_with_cohort.4 +# name: TestTrends.test_action_filtering_with_cohort.6 ''' SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-07 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -124,8 +158,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -133,16 +165,6 @@ # --- # name: TestTrends.test_action_filtering_with_cohort_poe_v2.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2 - ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, query_duration_ms, @@ -155,15 +177,61 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC ''' # --- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- # name: TestTrends.test_action_filtering_with_cohort_poe_v2.3 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.4 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.5 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -172,7 +240,7 @@ AND version = 2 ''' # --- -# name: TestTrends.test_action_filtering_with_cohort_poe_v2.4 +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.6 ''' SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-07 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -708,8 +776,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -717,6 +783,25 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -725,7 +810,7 @@ AND version = 0 ''' # --- -# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2 +# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.3 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, @@ -1136,8 +1221,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2020-01-02' AND event_time >= '2020-01-02 00:00:00' ORDER BY event_time DESC @@ -1145,16 +1228,6 @@ # --- # name: TestTrends.test_filter_events_by_precalculated_cohort.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestTrends.test_filter_events_by_precalculated_cohort.2 - ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, query_duration_ms, @@ -1167,14 +1240,12 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2020-01-02' AND event_time >= '2020-01-02 00:00:00' ORDER BY event_time DESC ''' # --- -# name: TestTrends.test_filter_events_by_precalculated_cohort.3 +# name: TestTrends.test_filter_events_by_precalculated_cohort.2 ''' SELECT count(DISTINCT person_id) @@ -1184,7 +1255,55 @@ AND version = 0 ''' # --- +# name: TestTrends.test_filter_events_by_precalculated_cohort.3 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2020-01-02' + AND event_time >= '2020-01-02 00:00:00' + ORDER BY event_time DESC + ''' +# --- # name: TestTrends.test_filter_events_by_precalculated_cohort.4 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2020-01-02' + AND event_time >= '2020-01-02 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_filter_events_by_precalculated_cohort.5 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- +# name: TestTrends.test_filter_events_by_precalculated_cohort.6 ''' SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-02 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1244,8 +1363,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2020-01-02' AND event_time >= '2020-01-02 00:00:00' ORDER BY event_time DESC @@ -1253,16 +1370,6 @@ # --- # name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2 - ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, query_duration_ms, @@ -1275,14 +1382,12 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2020-01-02' AND event_time >= '2020-01-02 00:00:00' ORDER BY event_time DESC ''' # --- -# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3 +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2 ''' SELECT count(DISTINCT person_id) @@ -1292,7 +1397,55 @@ AND version = 0 ''' # --- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2020-01-02' + AND event_time >= '2020-01-02 00:00:00' + ORDER BY event_time DESC + ''' +# --- # name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.4 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2020-01-02' + AND event_time >= '2020-01-02 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.5 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.6 ''' SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-02 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1828,8 +1981,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -1837,6 +1988,25 @@ # --- # name: TestTrends.test_person_filtering_in_cohort_in_action.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_person_filtering_in_cohort_in_action.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -1845,7 +2015,7 @@ AND version = 0 ''' # --- -# name: TestTrends.test_person_filtering_in_cohort_in_action.2 +# name: TestTrends.test_person_filtering_in_cohort_in_action.3 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, @@ -1912,8 +2082,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -1921,6 +2089,25 @@ # --- # name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -1929,7 +2116,7 @@ AND version = 0 ''' # --- -# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2 +# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.3 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, diff --git a/posthog/hogql_queries/web_analytics/test/__snapshots__/test_web_stats_table.ambr b/posthog/hogql_queries/web_analytics/test/__snapshots__/test_web_stats_table.ambr index cb70702738..103c1df465 100644 --- a/posthog/hogql_queries/web_analytics/test/__snapshots__/test_web_stats_table.ambr +++ b/posthog/hogql_queries/web_analytics/test/__snapshots__/test_web_stats_table.ambr @@ -575,8 +575,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -584,6 +582,25 @@ # --- # name: TestWebStatsTableQueryRunner.test_cohort_test_filters.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -592,7 +609,7 @@ AND version = 0 ''' # --- -# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.2 +# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.3 ''' SELECT timestamp @@ -603,7 +620,7 @@ limit 1 ''' # --- -# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.3 +# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.4 ''' SELECT breakdown_value AS `context.columns.breakdown_value`, tuple(uniq(filtered_person_id), NULL) AS `context.columns.visitors`, diff --git a/posthog/models/cohort/util.py b/posthog/models/cohort/util.py index 0bf15b631f..87bf1a5230 100644 --- a/posthog/models/cohort/util.py +++ b/posthog/models/cohort/util.py @@ -45,6 +45,10 @@ from posthog.queries.util import PersonPropertiesMode # temporary marker to denote when cohortpeople table started being populated TEMP_PRECALCULATED_MARKER = parser.parse("2021-06-07T15:00:00+00:00") +# Cohort query timeout settings +COHORT_QUERY_TIMEOUT_SECONDS = 600 # Max execution time for ClickHouse cohort calculation queries +COHORT_STATS_COLLECTION_DELAY_SECONDS = 60 # Short delay to allow query_log to flush before collecting stats + logger = structlog.get_logger(__name__) @@ -73,22 +77,41 @@ def run_cohort_query( # Tag the query for tracking tag_queries(kind="cohort_calculation", id=cohort_tag) + delayed_task = None + # Use tags_context to protect tags during import (circular import resolution can corrupt context) + with tags_context(): + from posthog.tasks.calculate_cohort import COHORT_CALCULATION_STARTED_COUNTER, collect_cohort_query_stats + + # Track that a calculation is starting (before it runs, so we catch OOMs) + COHORT_CALCULATION_STARTED_COUNTER.inc() + + # Schedule delayed task to collect stats after query_log_archive is synced + # Only if we have a history record to update + if history and query: + delayed_task = collect_cohort_query_stats.apply_async( + args=[cohort_tag, cohort_id, start_time.isoformat(), history.id, query], + countdown=COHORT_QUERY_TIMEOUT_SECONDS, + ) + try: result = fn(*args, **kwargs) end_time = timezone.now() # Capture when query actually finished + # If calculation succeeded and we scheduled a delayed task, cancel it and run immediately + # This avoids waiting the full timeout when the query completed quickly + if delayed_task and history and query: + if delayed_task.state in ["PENDING", "RECEIVED"]: + delayed_task.revoke() # Cancel the delayed task + + # Run immediately since the query already completed + collect_cohort_query_stats.apply_async( + args=[cohort_tag, cohort_id, start_time.isoformat(), history.id, query], + countdown=COHORT_STATS_COLLECTION_DELAY_SECONDS, + ) + return result, end_time finally: - # Schedule delayed task to collect stats after query_log_archive is synced - # Only if we have a history record to update - if history and query: - from posthog.tasks.calculate_cohort import collect_cohort_query_stats - - collect_cohort_query_stats.apply_async( - args=[cohort_tag, cohort_id, start_time.isoformat(), history.id, query], - countdown=60, - ) # Reset query tags to avoid affecting other queries from posthog.clickhouse.query_tagging import reset_query_tags @@ -119,7 +142,6 @@ def get_clickhouse_query_stats(tag_matcher: str, cohort_id: int, start_time: dat lc_cohort_id = %(cohort_id)s AND team_id = %(team_id)s AND query LIKE %(matcher)s - AND type IN ('QueryFinish', 'ExceptionWhileProcessing') AND event_date >= %(start_date)s AND event_time >= %(start_time)s ORDER BY event_time DESC @@ -516,9 +538,9 @@ def _recalculate_cohortpeople_for_team_hogql( "new_version": pending_version, }, settings={ - "max_execution_time": 600, - "send_timeout": 600, - "receive_timeout": 600, + "max_execution_time": COHORT_QUERY_TIMEOUT_SECONDS, + "send_timeout": COHORT_QUERY_TIMEOUT_SECONDS, + "receive_timeout": COHORT_QUERY_TIMEOUT_SECONDS, "optimize_on_insert": 0, "max_ast_elements": hogql_global_settings.max_ast_elements, "max_expanded_ast_elements": hogql_global_settings.max_expanded_ast_elements, diff --git a/posthog/queries/funnels/test/__snapshots__/test_funnel.ambr b/posthog/queries/funnels/test/__snapshots__/test_funnel.ambr index 966a71065d..80c7d052f7 100644 --- a/posthog/queries/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/queries/funnels/test/__snapshots__/test_funnel.ambr @@ -389,8 +389,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -398,6 +396,25 @@ # --- # name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -406,7 +423,7 @@ AND version = 0 ''' # --- -# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2 +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.3 ''' SELECT countIf(steps = 1) step_1, diff --git a/posthog/queries/test/__snapshots__/test_trends.ambr b/posthog/queries/test/__snapshots__/test_trends.ambr index bd3ff82ba8..ef3f095091 100644 --- a/posthog/queries/test/__snapshots__/test_trends.ambr +++ b/posthog/queries/test/__snapshots__/test_trends.ambr @@ -13,8 +13,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -22,6 +20,25 @@ # --- # name: TestTrends.test_action_filtering_with_cohort.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_action_filtering_with_cohort.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -30,7 +47,7 @@ AND version = 2 ''' # --- -# name: TestTrends.test_action_filtering_with_cohort.2 +# name: TestTrends.test_action_filtering_with_cohort.3 ''' SELECT groupArray(day_start) as date, @@ -102,8 +119,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -111,6 +126,25 @@ # --- # name: TestTrends.test_action_filtering_with_cohort_poe_v2.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -119,7 +153,7 @@ AND version = 2 ''' # --- -# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2 +# name: TestTrends.test_action_filtering_with_cohort_poe_v2.3 ''' SELECT groupArray(day_start) as date, @@ -1003,8 +1037,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2020-01-02' AND event_time >= '2020-01-02 00:00:00' ORDER BY event_time DESC @@ -1012,6 +1044,25 @@ # --- # name: TestTrends.test_filter_events_by_precalculated_cohort.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2020-01-02' + AND event_time >= '2020-01-02 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_filter_events_by_precalculated_cohort.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -1020,7 +1071,7 @@ AND version = 0 ''' # --- -# name: TestTrends.test_filter_events_by_precalculated_cohort.2 +# name: TestTrends.test_filter_events_by_precalculated_cohort.3 ''' SELECT groupArray(day_start) as date, @@ -1086,8 +1137,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2020-01-02' AND event_time >= '2020-01-02 00:00:00' ORDER BY event_time DESC @@ -1095,6 +1144,25 @@ # --- # name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2020-01-02' + AND event_time >= '2020-01-02 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -1103,7 +1171,7 @@ AND version = 0 ''' # --- -# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2 +# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3 ''' SELECT groupArray(day_start) as date, diff --git a/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recording_list_from_query.ambr b/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recording_list_from_query.ambr index 6cdfb4355d..aabfca43c3 100644 --- a/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recording_list_from_query.ambr +++ b/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recording_list_from_query.ambr @@ -5971,8 +5971,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -5980,6 +5978,25 @@ # --- # name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -5988,7 +6005,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.2 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.3 ''' SELECT DISTINCT p.id @@ -6036,7 +6053,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.3 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6076,7 +6093,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.4 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6141,8 +6158,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -6150,6 +6165,25 @@ # --- # name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -6158,7 +6192,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.2 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.3 ''' SELECT DISTINCT p.id @@ -6206,7 +6240,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.3 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6246,7 +6280,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.4 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6311,8 +6345,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -6320,6 +6352,25 @@ # --- # name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -6328,7 +6379,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.2 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.3 ''' SELECT DISTINCT p.id @@ -6376,7 +6427,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.3 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6416,7 +6467,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.4 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6481,8 +6532,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -6490,6 +6539,25 @@ # --- # name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -6498,7 +6566,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.2 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.3 ''' SELECT DISTINCT p.id @@ -6546,7 +6614,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.3 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -6586,7 +6654,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.4 +# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13298,8 +13366,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -13307,6 +13373,25 @@ # --- # name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -13315,7 +13400,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.2 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.3 ''' SELECT DISTINCT p.id @@ -13363,7 +13448,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.3 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13403,7 +13488,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.4 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13468,8 +13553,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -13477,6 +13560,25 @@ # --- # name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -13485,7 +13587,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.2 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.3 ''' SELECT DISTINCT p.id @@ -13533,7 +13635,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.3 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13573,7 +13675,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.4 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13638,8 +13740,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -13647,6 +13747,25 @@ # --- # name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -13655,7 +13774,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.2 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.3 ''' SELECT DISTINCT p.id @@ -13703,7 +13822,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.3 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13743,7 +13862,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.4 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13808,8 +13927,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= 'today' AND event_time >= 'today 00:00:00' ORDER BY event_time DESC @@ -13817,6 +13934,25 @@ # --- # name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= 'today' + AND event_time >= 'today 00:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -13825,7 +13961,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.2 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.3 ''' SELECT DISTINCT p.id @@ -13873,7 +14009,7 @@ AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) )) ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.3 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -13913,7 +14049,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.4 +# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, diff --git a/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recordings_list_by_cohort.ambr b/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recordings_list_by_cohort.ambr index a961268d43..b341b496a9 100644 --- a/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recordings_list_by_cohort.ambr +++ b/posthog/session_recordings/queries/test/listing_recordings/__snapshots__/test_session_recordings_list_by_cohort.ambr @@ -13,8 +13,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2021-08-21' AND event_time >= '2021-08-21 20:00:00' ORDER BY event_time DESC @@ -22,6 +20,25 @@ # --- # name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2021-08-21' + AND event_time >= '2021-08-21 20:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -30,7 +47,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.2 +# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.3 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -81,7 +98,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.3 +# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -146,8 +163,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2021-08-21' AND event_time >= '2021-08-21 20:00:00' ORDER BY event_time DESC @@ -155,6 +170,25 @@ # --- # name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2021-08-21' + AND event_time >= '2021-08-21 20:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -163,7 +197,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.2 +# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.3 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -221,7 +255,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.3 +# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.4 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -293,8 +327,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2021-08-21' AND event_time >= '2021-08-21 20:00:00' ORDER BY event_time DESC @@ -302,16 +334,6 @@ # --- # name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.1 ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.2 - ''' /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ SELECT query_id, query_duration_ms, @@ -324,14 +346,114 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2021-08-21' AND event_time >= '2021-08-21 20:00:00' ORDER BY event_time DESC ''' # --- +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.10 + ''' + SELECT s.session_id AS session_id, + any(s.team_id) AS `any(s.team_id)`, + any(s.distinct_id) AS `any(s.distinct_id)`, + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + max(s.retention_period_days) AS retention_period_days, + plus(dateTrunc('DAY', start_time), toIntervalDay(coalesce(retention_period_days, 21))) AS expiry_time, + dateDiff('DAY', toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC'), expiry_time) AS recording_ttl, + greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')) AS ongoing, + round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 99999), greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-18 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), in(s.distinct_id, + (SELECT person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE and(equals(person_distinct_id2.team_id, 99999), in(person_distinct_id2.distinct_id, + (SELECT person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE and(equals(person_distinct_id2.team_id, 99999), and(notIn(person_distinct_id2.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), notIn(person_distinct_id2.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999))))))))) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)))) + GROUP BY s.session_id + HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0) + ORDER BY start_time DESC, + s.session_id DESC + LIMIT 51 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.2 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- # name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.3 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2021-08-21' + AND event_time >= '2021-08-21 20:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.4 + ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2021-08-21' + AND event_time >= '2021-08-21 20:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.5 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -382,7 +504,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.4 +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.6 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -433,7 +555,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.5 +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.7 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -484,7 +606,7 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.6 +# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.8 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, @@ -535,114 +657,6 @@ allow_experimental_join_condition=1 ''' # --- -# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.7 - ''' - SELECT s.session_id AS session_id, - any(s.team_id) AS `any(s.team_id)`, - any(s.distinct_id) AS `any(s.distinct_id)`, - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - max(s.retention_period_days) AS retention_period_days, - plus(dateTrunc('DAY', start_time), toIntervalDay(coalesce(retention_period_days, 21))) AS expiry_time, - dateDiff('DAY', toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC'), expiry_time) AS recording_ttl, - greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-18 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), in(s.distinct_id, - (SELECT person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE and(equals(person_distinct_id2.team_id, 99999), in(person_distinct_id2.distinct_id, - (SELECT person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE and(equals(person_distinct_id2.team_id, 99999), and(notIn(person_distinct_id2.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), notIn(person_distinct_id2.person_id, - (SELECT person_static_cohort.person_id AS person_id - FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999))))))))) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)))) - GROUP BY s.session_id - HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0) - ORDER BY start_time DESC, - s.session_id DESC - LIMIT 51 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - transform_null_in=1, - optimize_min_equality_disjunction_chain_length=4294967295, - allow_experimental_join_condition=1 - ''' -# --- -# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.8 - ''' - SELECT s.session_id AS session_id, - any(s.team_id) AS `any(s.team_id)`, - any(s.distinct_id) AS `any(s.distinct_id)`, - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - max(s.retention_period_days) AS retention_period_days, - plus(dateTrunc('DAY', start_time), toIntervalDay(coalesce(retention_period_days, 21))) AS expiry_time, - dateDiff('DAY', toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC'), expiry_time) AS recording_ttl, - greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-18 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), in(s.distinct_id, - (SELECT person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE and(equals(person_distinct_id2.team_id, 99999), in(person_distinct_id2.distinct_id, - (SELECT person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE and(equals(person_distinct_id2.team_id, 99999), and(notIn(person_distinct_id2.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), notIn(person_distinct_id2.person_id, - (SELECT person_static_cohort.person_id AS person_id - FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999))))))))) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)))) - GROUP BY s.session_id - HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0) - ORDER BY start_time DESC, - s.session_id DESC - LIMIT 51 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - transform_null_in=1, - optimize_min_equality_disjunction_chain_length=4294967295, - allow_experimental_join_condition=1 - ''' -# --- # name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.9 ''' SELECT s.session_id AS session_id, @@ -683,9 +697,9 @@ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)))) GROUP BY s.session_id HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0) - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, + ORDER BY start_time DESC, + s.session_id DESC + LIMIT 51 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, @@ -711,8 +725,6 @@ WHERE lc_cohort_id = 99999 AND team_id = 99999 AND query LIKE '%cohort_calc:00000000%' - AND type IN ('QueryFinish', - 'ExceptionWhileProcessing') AND event_date >= '2021-08-21' AND event_time >= '2021-08-21 20:00:00' ORDER BY event_time DESC @@ -720,6 +732,25 @@ # --- # name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.1 ''' + /* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */ + SELECT query_id, + query_duration_ms, + read_rows, + read_bytes, + written_rows, + memory_usage, + exception + FROM query_log_archive + WHERE lc_cohort_id = 99999 + AND team_id = 99999 + AND query LIKE '%cohort_calc:00000000%' + AND event_date >= '2021-08-21' + AND event_time >= '2021-08-21 20:00:00' + ORDER BY event_time DESC + ''' +# --- +# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.2 + ''' SELECT count(DISTINCT person_id) FROM cohortpeople @@ -728,7 +759,7 @@ AND version = 0 ''' # --- -# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.2 +# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.3 ''' SELECT s.session_id AS session_id, any(s.team_id) AS `any(s.team_id)`, diff --git a/posthog/tasks/calculate_cohort.py b/posthog/tasks/calculate_cohort.py index 617b12a884..91243e13cf 100644 --- a/posthog/tasks/calculate_cohort.py +++ b/posthog/tasks/calculate_cohort.py @@ -1,5 +1,5 @@ import time -from datetime import timedelta +from datetime import datetime, timedelta from typing import Any, Optional from django.conf import settings @@ -10,7 +10,7 @@ import structlog import posthoganalytics from celery import chain, current_task, shared_task from dateutil.relativedelta import relativedelta -from prometheus_client import Counter, Gauge +from prometheus_client import Counter, Gauge, Histogram from posthog.api.monitoring import Feature from posthog.clickhouse import query_tagging @@ -18,6 +18,7 @@ from posthog.clickhouse.query_tagging import QueryTags, update_tags from posthog.exceptions_capture import capture_exception from posthog.models import Cohort from posthog.models.cohort import CohortOrEmpty +from posthog.models.cohort.calculation_history import CohortCalculationHistory from posthog.models.cohort.util import ( get_all_cohort_dependencies, get_all_cohort_dependents, @@ -58,6 +59,30 @@ COHORT_MAXED_ERRORS_GAUGE = Gauge( "cohort_maxed_errors", "Number of cohorts that have reached the maximum number of errors" ) +COHORT_CALCULATION_STARTED_COUNTER = Counter( + "cohort_calculation_started_total", + "Cohort calculations started (tracks all attempts including those that may OOM)", +) + +COHORT_CALCULATION_COMPLETED_COUNTER = Counter( + "cohort_calculation_completed_total", + "Cohort calculations that completed (either success or caught error)", + ["status"], # labels: "success", "error" +) + +COHORT_CALCULATION_FAILURES_COUNTER = Counter( + "cohort_calculation_failures_total", + "Cohort calculation failures by type", + ["failure_type"], # labels: "exception", "clickhouse_error", etc. +) + +COHORT_CALCULATION_DURATION_SECONDS = Histogram( + "cohort_calculation_duration_seconds", + "Duration of cohort calculations in seconds", + ["status"], # labels: "success", "error" + buckets=[1, 5, 10, 30, 60, 120, 300, 600, 1800, 3600], +) + logger = structlog.get_logger(__name__) MAX_AGE_MINUTES = 15 @@ -178,11 +203,12 @@ def enqueue_cohorts_to_calculate(parallel_count: int) -> None: capture_exception(error=e, additional_properties={"cohort_id": cohort.pk, "team_id": cohort.team_id}) # Skip this cohort and continue with others continue - logger.warning("enqueued_cohort_calculation", cohort_ids=cohort_ids) backlog = get_cohort_calculation_candidates_queryset().count() COHORT_RECALCULATIONS_BACKLOG_GAUGE.set(backlog) + logger.warning("enqueued_cohort_calculation", cohort_ids=cohort_ids, COHORT_RECALCULATIONS_BACKLOG_GAUGE=backlog) + try: update_cohort_metrics() except Exception as e: @@ -362,12 +388,80 @@ def insert_cohort_from_feature_flag(cohort_id: int, flag_key: str, team_id: int) get_cohort_actors_for_feature_flag(cohort_id, flag_key, team_id, batchsize=10_000) +def _collect_cohort_calculation_metrics(history: CohortCalculationHistory, start_time: datetime) -> None: + """ + Collect Prometheus metrics for cohort calculation based on the history record. + This is called from collect_cohort_query_stats to ensure metrics are captured even if OOM occurred. + + Args: + history: CohortCalculationHistory instance + start_time: datetime when the calculation started (used as fallback) + """ + # Determine if calculation completed + if not history.finished_at: + # Calculation never finished - likely OOM or worker crash + COHORT_CALCULATION_FAILURES_COUNTER.labels(failure_type="stuck_inferred_oom").inc() + COHORT_CALCULATION_COMPLETED_COUNTER.labels(status="error").inc() + logger.warning( + "cohort_calculation_stuck_detected", + cohort_id=history.cohort_id, + history_id=str(history.id), + ) + return + + # Get actual query duration from ClickHouse query_log (most accurate) + # Fallback to wall-clock time if query stats aren't available + duration_seconds = None + if history.total_query_ms: + duration_seconds = history.total_query_ms / 1000.0 + else: + # Fallback: use wall-clock time + duration_seconds = (history.finished_at - start_time).total_seconds() + + if history.error: + # Calculation finished with error + COHORT_CALCULATION_COMPLETED_COUNTER.labels(status="error").inc() + if duration_seconds is not None: + COHORT_CALCULATION_DURATION_SECONDS.labels(status="error").observe(duration_seconds) + + # Categorize failure type + failure_type = "exception" + error_lower = history.error.lower() + if "clickhouse" in error_lower or "code:" in error_lower: + failure_type = "clickhouse_error" + elif "memory" in error_lower or "oom" in error_lower: + failure_type = "memory_error" + elif "timeout" in error_lower or "timed out" in error_lower: + failure_type = "timeout_error" + + COHORT_CALCULATION_FAILURES_COUNTER.labels(failure_type=failure_type).inc() + logger.warning( + "cohort_calculation_failed_with_error", + cohort_id=history.cohort_id, + history_id=str(history.id), + failure_type=failure_type, + duration_seconds=duration_seconds, + ) + else: + # Calculation succeeded + COHORT_CALCULATION_COMPLETED_COUNTER.labels(status="success").inc() + if duration_seconds is not None: + COHORT_CALCULATION_DURATION_SECONDS.labels(status="success").observe(duration_seconds) + logger.info( + "cohort_calculation_completed_successfully", + cohort_id=history.cohort_id, + history_id=str(history.id), + duration_seconds=duration_seconds, + count=history.count, + ) + + @shared_task(ignore_result=True, max_retries=2) def collect_cohort_query_stats( tag_matcher: str, cohort_id: int, start_time_iso: str, history_id: str, query: str ) -> None: """ - Delayed task to collect cohort query statistics + Delayed task to collect cohort query statistics and observability metrics Args: tag_matcher: Query tag to match in query_log_archive @@ -379,8 +473,6 @@ def collect_cohort_query_stats( try: from dateutil import parser - from posthog.models.cohort.calculation_history import CohortCalculationHistory - try: history = CohortCalculationHistory.objects.get(id=history_id) except CohortCalculationHistory.DoesNotExist: @@ -390,7 +482,15 @@ def collect_cohort_query_stats( start_time = parser.parse(start_time_iso) query_stats = get_clickhouse_query_stats(tag_matcher, cohort_id, start_time, history.team.id) - if query_stats: + if query_stats: # Skip if stats already collected (check if queries field is non-empty) + if history.queries: + logger.warning( + "Query stats already collected, skipping duplicate collection", + history_id=history_id, + cohort_id=cohort_id, + ) + return + update_fields = [] # Only update history if it's still in progress (no finished_at) @@ -418,6 +518,10 @@ def collect_cohort_query_stats( history_id=history_id, ) + # Collect observability metrics based on the calculation result + # This runs even if the worker OOM'd, since this task was scheduled before the calculation + _collect_cohort_calculation_metrics(history, start_time) + except Exception as e: logger.exception( "Failed to collect delayed cohort query stats", diff --git a/posthog/urls.py b/posthog/urls.py index 605ffbb4e5..619857c40f 100644 --- a/posthog/urls.py +++ b/posthog/urls.py @@ -9,8 +9,8 @@ from django.utils.http import url_has_allowed_host_and_scheme from django.views.decorators.csrf import csrf_exempt, ensure_csrf_cookie, requires_csrf_token import structlog -from django_prometheus.exports import ExportToDjangoView from drf_spectacular.views import SpectacularAPIView, SpectacularRedocView, SpectacularSwaggerView +from prometheus_client import CollectorRegistry, generate_latest, multiprocess from two_factor.urls import urlpatterns as tf_urls from posthog.api import ( @@ -250,7 +250,26 @@ if settings.DEBUG: # that in production we expose these metrics on a separate port (8001), to ensure # external clients cannot see them. See bin/granian_metrics.py and bin/unit_metrics.py # for details on the production metrics setup. - urlpatterns.append(path("_metrics", ExportToDjangoView)) + + # Use multiprocess mode to collect metrics from all processes (Django + Celery workers) + import os + + def metrics_view(request): + """Metrics endpoint that aggregates from all processes using multiprocess mode.""" + registry = CollectorRegistry() + # If prometheus_multiproc_dir is set, collect from all processes + if "prometheus_multiproc_dir" in os.environ or "PROMETHEUS_MULTIPROC_DIR" in os.environ: + multiprocess.MultiProcessCollector(registry) + else: + # Fallback to default registry if multiprocess not configured + from prometheus_client import REGISTRY + + registry = REGISTRY + + metrics_output = generate_latest(registry) + return HttpResponse(metrics_output, content_type="text/plain; charset=utf-8; version=0.0.4") + + urlpatterns.append(path("_metrics", metrics_view)) # Temporal codec server endpoint for UI decryption - locally only for now urlpatterns.append(path("decode", decode_payloads, name="temporal_decode"))