chore: improved cohorts calculation observability (#41465)

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Gustavo H. Strassburger
2025-11-13 21:19:00 -03:00
committed by GitHub
parent f2a93583d4
commit 8425c2bf82
18 changed files with 1291 additions and 494 deletions

View File

@@ -171,8 +171,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -180,6 +178,25 @@
# ---
# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -188,7 +205,7 @@
AND version = 0
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.2
# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.3
'''
(SELECT cohort_people.person_id AS id
@@ -269,7 +286,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.3
# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence.4
'''
SELECT if(funnel_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, funnel_query.person_id) AS id
@@ -341,8 +358,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -350,12 +365,21 @@
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.10
@@ -638,6 +662,16 @@
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.3
'''
(SELECT cohort_people.person_id AS id
@@ -684,7 +718,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.3
# name: TestCohortQuery.test_cohort_filter_with_extra.4
'''
SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id
@@ -726,7 +760,7 @@
join_algorithm = 'auto'
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.4
# name: TestCohortQuery.test_cohort_filter_with_extra.5
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
@@ -740,14 +774,31 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.5
# name: TestCohortQuery.test_cohort_filter_with_extra.6
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.7
'''
SELECT count(DISTINCT person_id)
@@ -757,99 +808,6 @@
AND version = 0
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.6
'''
(
(SELECT persons.id AS id
FROM
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name,
person.id AS id
FROM person
WHERE and(equals(person.team_id, 99999), in(id,
(SELECT where_optimization.id AS id
FROM person AS where_optimization
WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, 'name'), ''), 'null'), '^"|"$', ''), 'test'), 0)))))
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons
WHERE ifNull(equals(persons.properties___name, 'test'), 0)
ORDER BY persons.id ASC
LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto'))
UNION DISTINCT (
(SELECT source.id AS id
FROM
(SELECT actor_id AS actor_id,
count() AS event_count,
groupUniqArray(distinct_id) AS event_distinct_ids,
actor_id AS id
FROM
(SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS actor_id,
toTimeZone(e.timestamp, 'UTC') AS timestamp,
e.uuid AS uuid,
e.distinct_id AS distinct_id
FROM events AS e
LEFT OUTER JOIN
(SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 99999)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE and(equals(e.team_id, 99999), greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('today', 6, 'UTC')), equals(e.event, '$pageview')))
GROUP BY actor_id) AS source
ORDER BY source.id ASC
LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto')) SETTINGS readonly=2,
max_execution_time=600,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.7
'''
SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id
FROM
(SELECT if(not(empty(pdi.distinct_id)), pdi.person_id, e.person_id) AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND event IN ['$pageview']
AND timestamp <= now()
AND timestamp >= now() - INTERVAL 1 week
GROUP BY person_id) behavior_query
FULL OUTER JOIN
(SELECT *,
id AS person_id
FROM
(SELECT id,
argMax(properties, version) as person_props
FROM person
WHERE team_id = 99999
GROUP BY id
HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id
WHERE 1 = 1
AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(person_props, 'name'), '^"|"$', ''))))
OR ((coalesce(performed_event_condition_None_level_level_0_level_1_level_0_0, false))))) SETTINGS optimize_aggregation_in_order = 1,
join_algorithm = 'auto'
'''
# ---
# name: TestCohortQuery.test_cohort_filter_with_extra.8
'''
(
@@ -1972,8 +1930,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -1981,6 +1937,25 @@
# ---
# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -1989,45 +1964,6 @@
AND version = 0
'''
# ---
# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.2
'''
(SELECT cohort_people.person_id AS id
FROM
(SELECT DISTINCT cohortpeople.person_id AS person_id,
cohortpeople.cohort_id AS cohort_id,
cohortpeople.team_id AS team_id
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 99999), in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(99999, 0)]))) AS cohort_people
WHERE and(ifNull(equals(cohort_people.cohort_id, 99999), 0), ifNull(equals(cohort_people.team_id, 99999), 0))
LIMIT 1000000000)
UNION DISTINCT
(SELECT persons.id AS id
FROM
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name,
person.id AS id
FROM person
WHERE and(equals(person.team_id, 99999), in(id,
(SELECT where_optimization.id AS id
FROM person AS where_optimization
WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, 'name'), ''), 'null'), '^"|"$', ''), 'test2'), 0)))))
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons
WHERE ifNull(equals(persons.properties___name, 'test2'), 0)
ORDER BY persons.id ASC
LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto') SETTINGS readonly=2,
max_execution_time=600,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1
'''
# ---
# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.3
'''
@@ -2068,6 +2004,45 @@
'''
# ---
# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.4
'''
(SELECT cohort_people.person_id AS id
FROM
(SELECT DISTINCT cohortpeople.person_id AS person_id,
cohortpeople.cohort_id AS cohort_id,
cohortpeople.team_id AS team_id
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 99999), in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(99999, 0)]))) AS cohort_people
WHERE and(ifNull(equals(cohort_people.cohort_id, 99999), 0), ifNull(equals(cohort_people.team_id, 99999), 0))
LIMIT 1000000000)
UNION DISTINCT
(SELECT persons.id AS id
FROM
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name,
person.id AS id
FROM person
WHERE and(equals(person.team_id, 99999), in(id,
(SELECT where_optimization.id AS id
FROM person AS where_optimization
WHERE and(equals(where_optimization.team_id, 99999), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, 'name'), ''), 'null'), '^"|"$', ''), 'test2'), 0)))))
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons
WHERE ifNull(equals(persons.properties___name, 'test2'), 0)
ORDER BY persons.id ASC
LIMIT 1000000000 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto') SETTINGS readonly=2,
max_execution_time=600,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1
'''
# ---
# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.5
'''
SELECT person.person_id AS id
@@ -2314,8 +2289,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -2323,6 +2296,25 @@
# ---
# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -2331,7 +2323,7 @@
AND version = 0
'''
# ---
# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.2
# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.3
'''
(SELECT cohort_people.person_id AS id
@@ -2379,7 +2371,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.3
# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.4
'''
SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id

View File

@@ -13,8 +13,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2021-01-21'
AND event_time >= '2021-01-21 00:00:00'
ORDER BY event_time DESC
@@ -22,6 +20,25 @@
# ---
# name: TestEventQuery.test_account_filters.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2021-01-21'
AND event_time >= '2021-01-21 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestEventQuery.test_account_filters.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -30,7 +47,7 @@
AND version = 0
'''
# ---
# name: TestEventQuery.test_account_filters.2
# name: TestEventQuery.test_account_filters.3
'''
SELECT e.timestamp as timestamp,
if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id

View File

@@ -131,8 +131,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -140,16 +138,6 @@
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.2
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
@@ -162,14 +150,12 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.3
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.2
'''
SELECT count(DISTINCT person_id)
@@ -179,7 +165,55 @@
AND version = 0
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.5
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.6
'''
/* user_id:0 request:_snapshot_ */
SELECT count(1)
@@ -203,7 +237,7 @@
HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.5
# name: TestBlastRadius.test_user_blast_radius_with_multiple_precalculated_cohorts.7
'''
/* user_id:0 request:_snapshot_ */
SELECT count(1)
@@ -267,8 +301,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -288,8 +320,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -297,6 +327,44 @@
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.5
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.6
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -305,7 +373,7 @@
AND version = 0
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.5
# name: TestBlastRadius.test_user_blast_radius_with_multiple_static_cohorts.7
'''
/* user_id:0 request:_snapshot_ */
SELECT count(1)
@@ -374,8 +442,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -383,6 +449,25 @@
# ---
# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.4
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -391,7 +476,7 @@
AND version = 0
'''
# ---
# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.4
# name: TestBlastRadius.test_user_blast_radius_with_single_cohort.5
'''
/* user_id:0 request:_snapshot_ */
SELECT count(1)

View File

@@ -2528,8 +2528,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -2537,6 +2535,25 @@
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -2545,7 +2562,7 @@
AND version = 0
'''
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.2
# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.3
'''
SELECT metric_events.variant AS variant,
count(metric_events.entity_id) AS num_users,
@@ -2610,7 +2627,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.3
# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.4
'''
SELECT metric_events.variant AS variant,
count(metric_events.entity_id) AS num_users,

View File

@@ -973,8 +973,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -982,6 +980,25 @@
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -990,7 +1007,7 @@
AND version = 0
'''
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.2
# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.3
'''
SELECT metric_events.variant AS variant,
count(metric_events.entity_id) AS num_users,
@@ -1046,7 +1063,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.3
# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.4
'''
SELECT metric_events.variant AS variant,
count(metric_events.entity_id) AS num_users,

View File

@@ -456,8 +456,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -465,6 +463,25 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -473,7 +490,7 @@
AND version = 0
'''
# ---
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.3
'''
SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1,
countIf(ifNull(equals(steps, 2), 0)) AS step_2,

View File

@@ -355,8 +355,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -364,6 +362,25 @@
# ---
# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -372,7 +389,7 @@
AND version = 0
'''
# ---
# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.2
# name: TestFOSSFunnelUDF.test_funnel_with_precalculated_cohort_step_filter.3
'''
SELECT sum(step_1) AS step_1,
sum(step_2) AS step_2,

View File

@@ -13,8 +13,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -22,6 +20,25 @@
# ---
# name: TestLifecycleQueryRunner.test_cohort_filter.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestLifecycleQueryRunner.test_cohort_filter.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -30,7 +47,7 @@
AND version = 0
'''
# ---
# name: TestLifecycleQueryRunner.test_cohort_filter.2
# name: TestLifecycleQueryRunner.test_cohort_filter.3
'''
SELECT groupArray(start_of_period) AS date,
groupArray(counts) AS total,

View File

@@ -209,8 +209,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -218,6 +216,25 @@
# ---
# name: TestFormula.test_breakdown_cohort.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestFormula.test_breakdown_cohort.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -226,7 +243,7 @@
AND version = 0
'''
# ---
# name: TestFormula.test_breakdown_cohort.2
# name: TestFormula.test_breakdown_cohort.3
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
@@ -269,7 +286,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestFormula.test_breakdown_cohort.3
# name: TestFormula.test_breakdown_cohort.4
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
@@ -312,7 +329,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestFormula.test_breakdown_cohort.4
# name: TestFormula.test_breakdown_cohort.5
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
@@ -365,7 +382,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestFormula.test_breakdown_cohort.5
# name: TestFormula.test_breakdown_cohort.6
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,

View File

@@ -13,8 +13,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -22,16 +20,6 @@
# ---
# name: TestTrends.test_action_filtering_with_cohort.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.2
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
@@ -44,15 +32,61 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.5
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -61,7 +95,7 @@
AND version = 2
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.4
# name: TestTrends.test_action_filtering_with_cohort.6
'''
SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-07 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
@@ -124,8 +158,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -133,16 +165,6 @@
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
@@ -155,15 +177,61 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.5
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -172,7 +240,7 @@
AND version = 2
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.4
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.6
'''
SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2020-01-01 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-07 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
@@ -708,8 +776,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -717,6 +783,25 @@
# ---
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -725,7 +810,7 @@
AND version = 0
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.3
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
@@ -1136,8 +1221,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
@@ -1145,16 +1228,6 @@
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.2
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
@@ -1167,14 +1240,12 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.3
# name: TestTrends.test_filter_events_by_precalculated_cohort.2
'''
SELECT count(DISTINCT person_id)
@@ -1184,7 +1255,55 @@
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.5
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.6
'''
SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-02 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
@@ -1244,8 +1363,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
@@ -1253,16 +1370,6 @@
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
@@ -1275,14 +1382,12 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2
'''
SELECT count(DISTINCT person_id)
@@ -1292,7 +1397,55 @@
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.5
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.6
'''
SELECT arrayMap(number -> plus(toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfInterval(assumeNotNull(toDateTime('2019-12-26 00:00:00', 'UTC')), toIntervalDay(1)), toStartOfInterval(assumeNotNull(toDateTime('2020-01-02 23:59:59', 'UTC')), toIntervalDay(1)))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
@@ -1828,8 +1981,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -1837,6 +1988,25 @@
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -1845,7 +2015,7 @@
AND version = 0
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action.2
# name: TestTrends.test_person_filtering_in_cohort_in_action.3
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
@@ -1912,8 +2082,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -1921,6 +2089,25 @@
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -1929,7 +2116,7 @@
AND version = 0
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.3
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,

View File

@@ -575,8 +575,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -584,6 +582,25 @@
# ---
# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -592,7 +609,7 @@
AND version = 0
'''
# ---
# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.2
# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.3
'''
SELECT timestamp
@@ -603,7 +620,7 @@
limit 1
'''
# ---
# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.3
# name: TestWebStatsTableQueryRunner.test_cohort_test_filters.4
'''
SELECT breakdown_value AS `context.columns.breakdown_value`,
tuple(uniq(filtered_person_id), NULL) AS `context.columns.visitors`,

View File

@@ -45,6 +45,10 @@ from posthog.queries.util import PersonPropertiesMode
# temporary marker to denote when cohortpeople table started being populated
TEMP_PRECALCULATED_MARKER = parser.parse("2021-06-07T15:00:00+00:00")
# Cohort query timeout settings
COHORT_QUERY_TIMEOUT_SECONDS = 600 # Max execution time for ClickHouse cohort calculation queries
COHORT_STATS_COLLECTION_DELAY_SECONDS = 60 # Short delay to allow query_log to flush before collecting stats
logger = structlog.get_logger(__name__)
@@ -73,22 +77,41 @@ def run_cohort_query(
# Tag the query for tracking
tag_queries(kind="cohort_calculation", id=cohort_tag)
delayed_task = None
# Use tags_context to protect tags during import (circular import resolution can corrupt context)
with tags_context():
from posthog.tasks.calculate_cohort import COHORT_CALCULATION_STARTED_COUNTER, collect_cohort_query_stats
# Track that a calculation is starting (before it runs, so we catch OOMs)
COHORT_CALCULATION_STARTED_COUNTER.inc()
# Schedule delayed task to collect stats after query_log_archive is synced
# Only if we have a history record to update
if history and query:
delayed_task = collect_cohort_query_stats.apply_async(
args=[cohort_tag, cohort_id, start_time.isoformat(), history.id, query],
countdown=COHORT_QUERY_TIMEOUT_SECONDS,
)
try:
result = fn(*args, **kwargs)
end_time = timezone.now() # Capture when query actually finished
# If calculation succeeded and we scheduled a delayed task, cancel it and run immediately
# This avoids waiting the full timeout when the query completed quickly
if delayed_task and history and query:
if delayed_task.state in ["PENDING", "RECEIVED"]:
delayed_task.revoke() # Cancel the delayed task
# Run immediately since the query already completed
collect_cohort_query_stats.apply_async(
args=[cohort_tag, cohort_id, start_time.isoformat(), history.id, query],
countdown=COHORT_STATS_COLLECTION_DELAY_SECONDS,
)
return result, end_time
finally:
# Schedule delayed task to collect stats after query_log_archive is synced
# Only if we have a history record to update
if history and query:
from posthog.tasks.calculate_cohort import collect_cohort_query_stats
collect_cohort_query_stats.apply_async(
args=[cohort_tag, cohort_id, start_time.isoformat(), history.id, query],
countdown=60,
)
# Reset query tags to avoid affecting other queries
from posthog.clickhouse.query_tagging import reset_query_tags
@@ -119,7 +142,6 @@ def get_clickhouse_query_stats(tag_matcher: str, cohort_id: int, start_time: dat
lc_cohort_id = %(cohort_id)s
AND team_id = %(team_id)s
AND query LIKE %(matcher)s
AND type IN ('QueryFinish', 'ExceptionWhileProcessing')
AND event_date >= %(start_date)s
AND event_time >= %(start_time)s
ORDER BY event_time DESC
@@ -516,9 +538,9 @@ def _recalculate_cohortpeople_for_team_hogql(
"new_version": pending_version,
},
settings={
"max_execution_time": 600,
"send_timeout": 600,
"receive_timeout": 600,
"max_execution_time": COHORT_QUERY_TIMEOUT_SECONDS,
"send_timeout": COHORT_QUERY_TIMEOUT_SECONDS,
"receive_timeout": COHORT_QUERY_TIMEOUT_SECONDS,
"optimize_on_insert": 0,
"max_ast_elements": hogql_global_settings.max_ast_elements,
"max_expanded_ast_elements": hogql_global_settings.max_expanded_ast_elements,

View File

@@ -389,8 +389,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -398,6 +396,25 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -406,7 +423,7 @@
AND version = 0
'''
# ---
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2
# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.3
'''
SELECT countIf(steps = 1) step_1,

View File

@@ -13,8 +13,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -22,6 +20,25 @@
# ---
# name: TestTrends.test_action_filtering_with_cohort.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -30,7 +47,7 @@
AND version = 2
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort.2
# name: TestTrends.test_action_filtering_with_cohort.3
'''
SELECT groupArray(day_start) as date,
@@ -102,8 +119,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -111,6 +126,25 @@
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -119,7 +153,7 @@
AND version = 2
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.2
# name: TestTrends.test_action_filtering_with_cohort_poe_v2.3
'''
SELECT groupArray(day_start) as date,
@@ -1003,8 +1037,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
@@ -1012,6 +1044,25 @@
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -1020,7 +1071,7 @@
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort.2
# name: TestTrends.test_filter_events_by_precalculated_cohort.3
'''
SELECT groupArray(day_start) as date,
@@ -1086,8 +1137,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
@@ -1095,6 +1144,25 @@
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2020-01-02'
AND event_time >= '2020-01-02 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -1103,7 +1171,7 @@
AND version = 0
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.2
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2.3
'''
SELECT groupArray(day_start) as date,

View File

@@ -5971,8 +5971,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -5980,6 +5978,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -5988,7 +6005,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.2
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.3
'''
SELECT DISTINCT p.id
@@ -6036,7 +6053,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.3
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6076,7 +6093,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.4
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6141,8 +6158,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -6150,6 +6165,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -6158,7 +6192,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.2
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.3
'''
SELECT DISTINCT p.id
@@ -6206,7 +6240,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.3
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6246,7 +6280,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.4
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_materialized.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6311,8 +6345,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -6320,6 +6352,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -6328,7 +6379,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.2
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.3
'''
SELECT DISTINCT p.id
@@ -6376,7 +6427,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.3
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6416,7 +6467,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.4
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6481,8 +6532,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -6490,6 +6539,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -6498,7 +6566,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.2
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.3
'''
SELECT DISTINCT p.id
@@ -6546,7 +6614,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.3
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -6586,7 +6654,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.4
# name: TestSessionRecordingsListFromQuery_0.test_filter_users_from_excluded_cohort_no_events_materialized.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13298,8 +13366,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -13307,6 +13373,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -13315,7 +13400,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.2
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.3
'''
SELECT DISTINCT p.id
@@ -13363,7 +13448,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.3
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13403,7 +13488,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.4
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13468,8 +13553,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -13477,6 +13560,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -13485,7 +13587,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.2
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.3
'''
SELECT DISTINCT p.id
@@ -13533,7 +13635,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.3
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13573,7 +13675,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.4
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_materialized.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13638,8 +13740,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -13647,6 +13747,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -13655,7 +13774,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.2
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.3
'''
SELECT DISTINCT p.id
@@ -13703,7 +13822,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.3
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13743,7 +13862,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.4
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13808,8 +13927,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
@@ -13817,6 +13934,25 @@
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= 'today'
AND event_time >= 'today 00:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -13825,7 +13961,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.2
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.3
'''
SELECT DISTINCT p.id
@@ -13873,7 +14009,7 @@
AND ((has(['yes'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$is_internal'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.3
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -13913,7 +14049,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.4
# name: TestSessionRecordingsListFromQuery_1.test_filter_users_from_excluded_cohort_no_events_materialized.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,

View File

@@ -13,8 +13,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
@@ -22,6 +20,25 @@
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -30,7 +47,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.2
# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.3
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -81,7 +98,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.3
# name: TestSessionRecordingsListByCohort.test_filter_with_cohort_properties.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -146,8 +163,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
@@ -155,6 +170,25 @@
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -163,7 +197,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.2
# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.3
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -221,7 +255,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.3
# name: TestSessionRecordingsListByCohort.test_filter_with_events_and_cohorts.4
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -293,8 +327,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
@@ -302,16 +334,6 @@
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.1
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.2
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
@@ -324,14 +346,114 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.10
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
any(s.distinct_id) AS `any(s.distinct_id)`,
min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time,
max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time,
dateDiff('SECOND', start_time, end_time) AS duration,
argMinMerge(s.first_url) AS first_url,
sum(s.click_count) AS click_count,
sum(s.keypress_count) AS keypress_count,
sum(s.mouse_activity_count) AS mouse_activity_count,
divide(sum(s.active_milliseconds), 1000) AS active_seconds,
minus(duration, active_seconds) AS inactive_seconds,
sum(s.console_log_count) AS console_log_count,
sum(s.console_warn_count) AS console_warn_count,
sum(s.console_error_count) AS console_error_count,
max(s.retention_period_days) AS retention_period_days,
plus(dateTrunc('DAY', start_time), toIntervalDay(coalesce(retention_period_days, 21))) AS expiry_time,
dateDiff('DAY', toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC'), expiry_time) AS recording_ttl,
greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')) AS ongoing,
round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score
FROM session_replay_events AS s
WHERE and(equals(s.team_id, 99999), greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-18 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), in(s.distinct_id,
(SELECT person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE and(equals(person_distinct_id2.team_id, 99999), in(person_distinct_id2.distinct_id,
(SELECT person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE and(equals(person_distinct_id2.team_id, 99999), and(notIn(person_distinct_id2.person_id,
(SELECT cohortpeople.person_id AS person_id
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), notIn(person_distinct_id2.person_id,
(SELECT person_static_cohort.person_id AS person_id
FROM person_static_cohort
WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))))))))
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0))))
GROUP BY s.session_id
HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0)
ORDER BY start_time DESC,
s.session_id DESC
LIMIT 51 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
WHERE team_id = 99999
AND cohort_id = 99999
AND version = 0
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.3
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.4
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.5
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -382,7 +504,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.4
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.6
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -433,7 +555,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.5
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.7
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -484,7 +606,7 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.6
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.8
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
@@ -535,114 +657,6 @@
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.7
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
any(s.distinct_id) AS `any(s.distinct_id)`,
min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time,
max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time,
dateDiff('SECOND', start_time, end_time) AS duration,
argMinMerge(s.first_url) AS first_url,
sum(s.click_count) AS click_count,
sum(s.keypress_count) AS keypress_count,
sum(s.mouse_activity_count) AS mouse_activity_count,
divide(sum(s.active_milliseconds), 1000) AS active_seconds,
minus(duration, active_seconds) AS inactive_seconds,
sum(s.console_log_count) AS console_log_count,
sum(s.console_warn_count) AS console_warn_count,
sum(s.console_error_count) AS console_error_count,
max(s.retention_period_days) AS retention_period_days,
plus(dateTrunc('DAY', start_time), toIntervalDay(coalesce(retention_period_days, 21))) AS expiry_time,
dateDiff('DAY', toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC'), expiry_time) AS recording_ttl,
greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')) AS ongoing,
round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score
FROM session_replay_events AS s
WHERE and(equals(s.team_id, 99999), greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-18 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), in(s.distinct_id,
(SELECT person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE and(equals(person_distinct_id2.team_id, 99999), in(person_distinct_id2.distinct_id,
(SELECT person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE and(equals(person_distinct_id2.team_id, 99999), and(notIn(person_distinct_id2.person_id,
(SELECT cohortpeople.person_id AS person_id
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), notIn(person_distinct_id2.person_id,
(SELECT person_static_cohort.person_id AS person_id
FROM person_static_cohort
WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))))))))
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0))))
GROUP BY s.session_id
HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0)
ORDER BY start_time DESC,
s.session_id DESC
LIMIT 51 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.8
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,
any(s.distinct_id) AS `any(s.distinct_id)`,
min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time,
max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time,
dateDiff('SECOND', start_time, end_time) AS duration,
argMinMerge(s.first_url) AS first_url,
sum(s.click_count) AS click_count,
sum(s.keypress_count) AS keypress_count,
sum(s.mouse_activity_count) AS mouse_activity_count,
divide(sum(s.active_milliseconds), 1000) AS active_seconds,
minus(duration, active_seconds) AS inactive_seconds,
sum(s.console_log_count) AS console_log_count,
sum(s.console_warn_count) AS console_warn_count,
sum(s.console_error_count) AS console_error_count,
max(s.retention_period_days) AS retention_period_days,
plus(dateTrunc('DAY', start_time), toIntervalDay(coalesce(retention_period_days, 21))) AS expiry_time,
dateDiff('DAY', toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC'), expiry_time) AS recording_ttl,
greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')) AS ongoing,
round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score
FROM session_replay_events AS s
WHERE and(equals(s.team_id, 99999), greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-18 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), in(s.distinct_id,
(SELECT person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE and(equals(person_distinct_id2.team_id, 99999), in(person_distinct_id2.distinct_id,
(SELECT person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE and(equals(person_distinct_id2.team_id, 99999), and(notIn(person_distinct_id2.person_id,
(SELECT cohortpeople.person_id AS person_id
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), notIn(person_distinct_id2.person_id,
(SELECT person_static_cohort.person_id AS person_id
FROM person_static_cohort
WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))))))))
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0))))
GROUP BY s.session_id
HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0)
ORDER BY start_time DESC,
s.session_id DESC
LIMIT 51 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1
'''
# ---
# name: TestSessionRecordingsListByCohort.test_filter_with_static_and_dynamic_cohort_properties.9
'''
SELECT s.session_id AS session_id,
@@ -683,9 +697,9 @@
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0))))
GROUP BY s.session_id
HAVING ifNull(greaterOrEquals(expiry_time, toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0)
ORDER BY start_time DESC
LIMIT 51
OFFSET 0 SETTINGS readonly=2,
ORDER BY start_time DESC,
s.session_id DESC
LIMIT 51 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
@@ -711,8 +725,6 @@
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND type IN ('QueryFinish',
'ExceptionWhileProcessing')
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
@@ -720,6 +732,25 @@
# ---
# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.1
'''
/* celery:posthog.tasks.calculate_cohort.collect_cohort_query_stats */
SELECT query_id,
query_duration_ms,
read_rows,
read_bytes,
written_rows,
memory_usage,
exception
FROM query_log_archive
WHERE lc_cohort_id = 99999
AND team_id = 99999
AND query LIKE '%cohort_calc:00000000%'
AND event_date >= '2021-08-21'
AND event_time >= '2021-08-21 20:00:00'
ORDER BY event_time DESC
'''
# ---
# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.2
'''
SELECT count(DISTINCT person_id)
FROM cohortpeople
@@ -728,7 +759,7 @@
AND version = 0
'''
# ---
# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.2
# name: TestSessionRecordingsListByCohort.test_internal_account_filter_with_cohort_properties.3
'''
SELECT s.session_id AS session_id,
any(s.team_id) AS `any(s.team_id)`,

View File

@@ -1,5 +1,5 @@
import time
from datetime import timedelta
from datetime import datetime, timedelta
from typing import Any, Optional
from django.conf import settings
@@ -10,7 +10,7 @@ import structlog
import posthoganalytics
from celery import chain, current_task, shared_task
from dateutil.relativedelta import relativedelta
from prometheus_client import Counter, Gauge
from prometheus_client import Counter, Gauge, Histogram
from posthog.api.monitoring import Feature
from posthog.clickhouse import query_tagging
@@ -18,6 +18,7 @@ from posthog.clickhouse.query_tagging import QueryTags, update_tags
from posthog.exceptions_capture import capture_exception
from posthog.models import Cohort
from posthog.models.cohort import CohortOrEmpty
from posthog.models.cohort.calculation_history import CohortCalculationHistory
from posthog.models.cohort.util import (
get_all_cohort_dependencies,
get_all_cohort_dependents,
@@ -58,6 +59,30 @@ COHORT_MAXED_ERRORS_GAUGE = Gauge(
"cohort_maxed_errors", "Number of cohorts that have reached the maximum number of errors"
)
COHORT_CALCULATION_STARTED_COUNTER = Counter(
"cohort_calculation_started_total",
"Cohort calculations started (tracks all attempts including those that may OOM)",
)
COHORT_CALCULATION_COMPLETED_COUNTER = Counter(
"cohort_calculation_completed_total",
"Cohort calculations that completed (either success or caught error)",
["status"], # labels: "success", "error"
)
COHORT_CALCULATION_FAILURES_COUNTER = Counter(
"cohort_calculation_failures_total",
"Cohort calculation failures by type",
["failure_type"], # labels: "exception", "clickhouse_error", etc.
)
COHORT_CALCULATION_DURATION_SECONDS = Histogram(
"cohort_calculation_duration_seconds",
"Duration of cohort calculations in seconds",
["status"], # labels: "success", "error"
buckets=[1, 5, 10, 30, 60, 120, 300, 600, 1800, 3600],
)
logger = structlog.get_logger(__name__)
MAX_AGE_MINUTES = 15
@@ -178,11 +203,12 @@ def enqueue_cohorts_to_calculate(parallel_count: int) -> None:
capture_exception(error=e, additional_properties={"cohort_id": cohort.pk, "team_id": cohort.team_id})
# Skip this cohort and continue with others
continue
logger.warning("enqueued_cohort_calculation", cohort_ids=cohort_ids)
backlog = get_cohort_calculation_candidates_queryset().count()
COHORT_RECALCULATIONS_BACKLOG_GAUGE.set(backlog)
logger.warning("enqueued_cohort_calculation", cohort_ids=cohort_ids, COHORT_RECALCULATIONS_BACKLOG_GAUGE=backlog)
try:
update_cohort_metrics()
except Exception as e:
@@ -362,12 +388,80 @@ def insert_cohort_from_feature_flag(cohort_id: int, flag_key: str, team_id: int)
get_cohort_actors_for_feature_flag(cohort_id, flag_key, team_id, batchsize=10_000)
def _collect_cohort_calculation_metrics(history: CohortCalculationHistory, start_time: datetime) -> None:
"""
Collect Prometheus metrics for cohort calculation based on the history record.
This is called from collect_cohort_query_stats to ensure metrics are captured even if OOM occurred.
Args:
history: CohortCalculationHistory instance
start_time: datetime when the calculation started (used as fallback)
"""
# Determine if calculation completed
if not history.finished_at:
# Calculation never finished - likely OOM or worker crash
COHORT_CALCULATION_FAILURES_COUNTER.labels(failure_type="stuck_inferred_oom").inc()
COHORT_CALCULATION_COMPLETED_COUNTER.labels(status="error").inc()
logger.warning(
"cohort_calculation_stuck_detected",
cohort_id=history.cohort_id,
history_id=str(history.id),
)
return
# Get actual query duration from ClickHouse query_log (most accurate)
# Fallback to wall-clock time if query stats aren't available
duration_seconds = None
if history.total_query_ms:
duration_seconds = history.total_query_ms / 1000.0
else:
# Fallback: use wall-clock time
duration_seconds = (history.finished_at - start_time).total_seconds()
if history.error:
# Calculation finished with error
COHORT_CALCULATION_COMPLETED_COUNTER.labels(status="error").inc()
if duration_seconds is not None:
COHORT_CALCULATION_DURATION_SECONDS.labels(status="error").observe(duration_seconds)
# Categorize failure type
failure_type = "exception"
error_lower = history.error.lower()
if "clickhouse" in error_lower or "code:" in error_lower:
failure_type = "clickhouse_error"
elif "memory" in error_lower or "oom" in error_lower:
failure_type = "memory_error"
elif "timeout" in error_lower or "timed out" in error_lower:
failure_type = "timeout_error"
COHORT_CALCULATION_FAILURES_COUNTER.labels(failure_type=failure_type).inc()
logger.warning(
"cohort_calculation_failed_with_error",
cohort_id=history.cohort_id,
history_id=str(history.id),
failure_type=failure_type,
duration_seconds=duration_seconds,
)
else:
# Calculation succeeded
COHORT_CALCULATION_COMPLETED_COUNTER.labels(status="success").inc()
if duration_seconds is not None:
COHORT_CALCULATION_DURATION_SECONDS.labels(status="success").observe(duration_seconds)
logger.info(
"cohort_calculation_completed_successfully",
cohort_id=history.cohort_id,
history_id=str(history.id),
duration_seconds=duration_seconds,
count=history.count,
)
@shared_task(ignore_result=True, max_retries=2)
def collect_cohort_query_stats(
tag_matcher: str, cohort_id: int, start_time_iso: str, history_id: str, query: str
) -> None:
"""
Delayed task to collect cohort query statistics
Delayed task to collect cohort query statistics and observability metrics
Args:
tag_matcher: Query tag to match in query_log_archive
@@ -379,8 +473,6 @@ def collect_cohort_query_stats(
try:
from dateutil import parser
from posthog.models.cohort.calculation_history import CohortCalculationHistory
try:
history = CohortCalculationHistory.objects.get(id=history_id)
except CohortCalculationHistory.DoesNotExist:
@@ -390,7 +482,15 @@ def collect_cohort_query_stats(
start_time = parser.parse(start_time_iso)
query_stats = get_clickhouse_query_stats(tag_matcher, cohort_id, start_time, history.team.id)
if query_stats:
if query_stats: # Skip if stats already collected (check if queries field is non-empty)
if history.queries:
logger.warning(
"Query stats already collected, skipping duplicate collection",
history_id=history_id,
cohort_id=cohort_id,
)
return
update_fields = []
# Only update history if it's still in progress (no finished_at)
@@ -418,6 +518,10 @@ def collect_cohort_query_stats(
history_id=history_id,
)
# Collect observability metrics based on the calculation result
# This runs even if the worker OOM'd, since this task was scheduled before the calculation
_collect_cohort_calculation_metrics(history, start_time)
except Exception as e:
logger.exception(
"Failed to collect delayed cohort query stats",

View File

@@ -9,8 +9,8 @@ from django.utils.http import url_has_allowed_host_and_scheme
from django.views.decorators.csrf import csrf_exempt, ensure_csrf_cookie, requires_csrf_token
import structlog
from django_prometheus.exports import ExportToDjangoView
from drf_spectacular.views import SpectacularAPIView, SpectacularRedocView, SpectacularSwaggerView
from prometheus_client import CollectorRegistry, generate_latest, multiprocess
from two_factor.urls import urlpatterns as tf_urls
from posthog.api import (
@@ -250,7 +250,26 @@ if settings.DEBUG:
# that in production we expose these metrics on a separate port (8001), to ensure
# external clients cannot see them. See bin/granian_metrics.py and bin/unit_metrics.py
# for details on the production metrics setup.
urlpatterns.append(path("_metrics", ExportToDjangoView))
# Use multiprocess mode to collect metrics from all processes (Django + Celery workers)
import os
def metrics_view(request):
"""Metrics endpoint that aggregates from all processes using multiprocess mode."""
registry = CollectorRegistry()
# If prometheus_multiproc_dir is set, collect from all processes
if "prometheus_multiproc_dir" in os.environ or "PROMETHEUS_MULTIPROC_DIR" in os.environ:
multiprocess.MultiProcessCollector(registry)
else:
# Fallback to default registry if multiprocess not configured
from prometheus_client import REGISTRY
registry = REGISTRY
metrics_output = generate_latest(registry)
return HttpResponse(metrics_output, content_type="text/plain; charset=utf-8; version=0.0.4")
urlpatterns.append(path("_metrics", metrics_view))
# Temporal codec server endpoint for UI decryption - locally only for now
urlpatterns.append(path("decode", decode_payloads, name="temporal_decode"))