From 40cfa5f6c02e6a96ccb0307f53fed11805182436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szczur?= Date: Wed, 5 Nov 2025 18:27:16 +0100 Subject: [PATCH] chore(clickhouse): decrease max part size on development (#40959) --- .github/workflows/ci-backend.yml | 2 + docker/clickhouse/config.xml | 9 + ..._analytics_top_customers_query_runner.ambr | 204 +++++++++--------- 3 files changed, 113 insertions(+), 102 deletions(-) diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml index 366467690c..dca861f805 100644 --- a/.github/workflows/ci-backend.yml +++ b/.github/workflows/ci-backend.yml @@ -90,10 +90,12 @@ jobs: - docker-compose.dev.yml - docker-compose.base.yml - frontend/public/email/* + - docker/clickhouse # These scripts are used in the CI - bin/check_temporal_up - bin/check_kafka_clickhouse_up migrations: + - docker/clickhouse - 'posthog/migrations/*.py' - 'products/*/backend/migrations/*.py' - 'products/*/migrations/*.py' # Legacy structure diff --git a/docker/clickhouse/config.xml b/docker/clickhouse/config.xml index 5e9b9a6cb5..7939cf52b1 100644 --- a/docker/clickhouse/config.xml +++ b/docker/clickhouse/config.xml @@ -75,6 +75,9 @@ 10 10000 + + 4 + @@ -417,4 +420,10 @@ \1(???) + + + 1073741824 + 2 + 2 + diff --git a/products/revenue_analytics/backend/hogql_queries/test/__snapshots__/test_revenue_analytics_top_customers_query_runner.ambr b/products/revenue_analytics/backend/hogql_queries/test/__snapshots__/test_revenue_analytics_top_customers_query_runner.ambr index bd229f3a8a..6809597574 100644 --- a/products/revenue_analytics/backend/hogql_queries/test/__snapshots__/test_revenue_analytics_top_customers_query_runner.ambr +++ b/products/revenue_analytics/backend/hogql_queries/test/__snapshots__/test_revenue_analytics_top_customers_query_runner.ambr @@ -1102,108 +1102,6 @@ # --- # name: TestRevenueAnalyticsTopCustomersQueryRunner.test_with_data_with_managed_viewsets_ff ''' - SELECT inner.customer_id AS customer_id, - revenue_analytics_customer.name AS name, - inner.amount AS amount, - inner.month AS month - FROM - (SELECT revenue_analytics_revenue_item.customer_id AS customer_id, - toStartOfMonth(toTimeZone(revenue_analytics_revenue_item.timestamp, 'UTC')) AS month, - sum(revenue_analytics_revenue_item.amount) AS amount - FROM - (SELECT toString(events.uuid) AS id, - toString(events.uuid) AS invoice_item_id, - 'revenue_analytics.events.purchase' AS source_label, - toTimeZone(events.timestamp, 'UTC') AS timestamp, - timestamp AS created_at, - isNotNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'subscription'), ''), 'null'), '^"|"$', '')) AS is_recurring, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'product'), ''), 'null'), '^"|"$', '') AS product_id, - toString(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id)) AS customer_id, - events.`$group_0` AS group_0_key, - events.`$group_1` AS group_1_key, - events.`$group_2` AS group_2_key, - events.`$group_3` AS group_3_key, - events.`$group_4` AS group_4_key, - NULL AS invoice_id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'subscription'), ''), 'null'), '^"|"$', '') AS subscription_id, - toString(events.`$session_id`) AS session_id, - events.event AS event_name, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'coupon'), ''), 'null'), '^"|"$', '') AS coupon, - coupon AS coupon_id, - upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')) AS original_currency, - accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'revenue'), ''), 'null'), '^"|"$', ''), 'Decimal64(10)') AS original_amount, - 1 AS enable_currency_aware_divider, - if(enable_currency_aware_divider, accurateCastOrNull(1, 'Decimal64(10)'), accurateCastOrNull(100, 'Decimal64(10)')) AS currency_aware_divider, - divideDecimal(original_amount, currency_aware_divider) AS currency_aware_amount, - 'GBP' AS currency, - if(isNull(upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', ''))), accurateCastOrNull(currency_aware_amount, 'Decimal64(10)'), if(equals(upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')), 'GBP'), toDecimal64(currency_aware_amount, 10), if(dictGetOrDefault(`posthog_test`.`exchange_rate_dict`, 'rate', upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')), toDate(toTimeZone(events.timestamp, 'UTC')), toDecimal64(0, 10)) = 0, toDecimal64(0, 10), multiplyDecimal(divideDecimal(toDecimal64(currency_aware_amount, 10), dictGetOrDefault(`posthog_test`.`exchange_rate_dict`, 'rate', upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')), toDate(toTimeZone(events.timestamp, 'UTC')), toDecimal64(0, 10))), dictGetOrDefault(`posthog_test`.`exchange_rate_dict`, 'rate', 'GBP', toDate(toTimeZone(events.timestamp, 'UTC')), toDecimal64(0, 10)))))) AS amount - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), and(equals(events.event, 'purchase'), 1, isNotNull(amount))) - ORDER BY timestamp DESC) AS revenue_analytics_revenue_item - WHERE and(greaterOrEquals(toTimeZone(revenue_analytics_revenue_item.timestamp, 'UTC'), assumeNotNull(toDateTime('2015-01-01 00:00:00', 'UTC'))), lessOrEquals(toTimeZone(revenue_analytics_revenue_item.timestamp, 'UTC'), assumeNotNull(toDateTime('2025-04-21 23:59:59', 'UTC')))) - GROUP BY customer_id, - month - ORDER BY amount DESC - LIMIT 20 BY month) AS inner - LEFT JOIN - (SELECT toString(persons.id) AS id, - 'revenue_analytics.events.purchase' AS source_label, - persons.created_at AS timestamp, - persons.properties___name AS name, - persons.properties___email AS email, - persons.properties___phone AS phone, - persons.properties___address AS address, - persons.properties___metadata AS metadata, - persons.`properties___$geoip_country_name` AS country, - formatDateTime(toStartOfMonth(persons.created_at), '%Y-%m') AS cohort, - NULL AS initial_coupon, - NULL AS initial_coupon_id - FROM - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'phone'), ''), 'null'), '^"|"$', '') AS properties___phone, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'address'), ''), 'null'), '^"|"$', '') AS properties___address, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'metadata'), ''), 'null'), '^"|"$', '') AS properties___metadata, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$geoip_country_name'), ''), 'null'), '^"|"$', '') AS `properties___$geoip_country_name`, - toTimeZone(person.created_at, 'UTC') AS created_at - FROM person - WHERE and(equals(person.team_id, 99999), in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) - ORDER BY argMax(toTimeZone(person.created_at, 'UTC'), person.version) DESC))) SETTINGS optimize_aggregation_in_order=1) AS persons - INNER JOIN - (SELECT DISTINCT events__person.id AS person_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE equals(events.team_id, 99999)) AS events ON equals(persons.id, events.person_id) - ORDER BY persons.created_at DESC) AS revenue_analytics_customer ON equals(inner.customer_id, revenue_analytics_customer.id) - ORDER BY amount DESC - LIMIT 20 BY month - LIMIT 100 - UNION ALL SELECT inner.customer_id AS customer_id, revenue_analytics_customer.name AS name, inner.amount AS amount, @@ -1319,6 +1217,108 @@ GROUP BY invoice.customer) AS cohort_inner ON equals(cohort_inner.customer_id, outer.id)) AS revenue_analytics_customer ON equals(inner.customer_id, revenue_analytics_customer.id) ORDER BY amount DESC LIMIT 20 BY month + LIMIT 100 + UNION ALL + SELECT inner.customer_id AS customer_id, + revenue_analytics_customer.name AS name, + inner.amount AS amount, + inner.month AS month + FROM + (SELECT revenue_analytics_revenue_item.customer_id AS customer_id, + toStartOfMonth(toTimeZone(revenue_analytics_revenue_item.timestamp, 'UTC')) AS month, + sum(revenue_analytics_revenue_item.amount) AS amount + FROM + (SELECT toString(events.uuid) AS id, + toString(events.uuid) AS invoice_item_id, + 'revenue_analytics.events.purchase' AS source_label, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + timestamp AS created_at, + isNotNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'subscription'), ''), 'null'), '^"|"$', '')) AS is_recurring, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'product'), ''), 'null'), '^"|"$', '') AS product_id, + toString(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id)) AS customer_id, + events.`$group_0` AS group_0_key, + events.`$group_1` AS group_1_key, + events.`$group_2` AS group_2_key, + events.`$group_3` AS group_3_key, + events.`$group_4` AS group_4_key, + NULL AS invoice_id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'subscription'), ''), 'null'), '^"|"$', '') AS subscription_id, + toString(events.`$session_id`) AS session_id, + events.event AS event_name, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'coupon'), ''), 'null'), '^"|"$', '') AS coupon, + coupon AS coupon_id, + upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')) AS original_currency, + accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'revenue'), ''), 'null'), '^"|"$', ''), 'Decimal64(10)') AS original_amount, + 1 AS enable_currency_aware_divider, + if(enable_currency_aware_divider, accurateCastOrNull(1, 'Decimal64(10)'), accurateCastOrNull(100, 'Decimal64(10)')) AS currency_aware_divider, + divideDecimal(original_amount, currency_aware_divider) AS currency_aware_amount, + 'GBP' AS currency, + if(isNull(upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', ''))), accurateCastOrNull(currency_aware_amount, 'Decimal64(10)'), if(equals(upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')), 'GBP'), toDecimal64(currency_aware_amount, 10), if(dictGetOrDefault(`posthog_test`.`exchange_rate_dict`, 'rate', upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')), toDate(toTimeZone(events.timestamp, 'UTC')), toDecimal64(0, 10)) = 0, toDecimal64(0, 10), multiplyDecimal(divideDecimal(toDecimal64(currency_aware_amount, 10), dictGetOrDefault(`posthog_test`.`exchange_rate_dict`, 'rate', upper(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'currency'), ''), 'null'), '^"|"$', '')), toDate(toTimeZone(events.timestamp, 'UTC')), toDecimal64(0, 10))), dictGetOrDefault(`posthog_test`.`exchange_rate_dict`, 'rate', 'GBP', toDate(toTimeZone(events.timestamp, 'UTC')), toDecimal64(0, 10)))))) AS amount + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, 'purchase'), 1, isNotNull(amount))) + ORDER BY timestamp DESC) AS revenue_analytics_revenue_item + WHERE and(greaterOrEquals(toTimeZone(revenue_analytics_revenue_item.timestamp, 'UTC'), assumeNotNull(toDateTime('2015-01-01 00:00:00', 'UTC'))), lessOrEquals(toTimeZone(revenue_analytics_revenue_item.timestamp, 'UTC'), assumeNotNull(toDateTime('2025-04-21 23:59:59', 'UTC')))) + GROUP BY customer_id, + month + ORDER BY amount DESC + LIMIT 20 BY month) AS inner + LEFT JOIN + (SELECT toString(persons.id) AS id, + 'revenue_analytics.events.purchase' AS source_label, + persons.created_at AS timestamp, + persons.properties___name AS name, + persons.properties___email AS email, + persons.properties___phone AS phone, + persons.properties___address AS address, + persons.properties___metadata AS metadata, + persons.`properties___$geoip_country_name` AS country, + formatDateTime(toStartOfMonth(persons.created_at), '%Y-%m') AS cohort, + NULL AS initial_coupon, + NULL AS initial_coupon_id + FROM + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'phone'), ''), 'null'), '^"|"$', '') AS properties___phone, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'address'), ''), 'null'), '^"|"$', '') AS properties___address, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'metadata'), ''), 'null'), '^"|"$', '') AS properties___metadata, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$geoip_country_name'), ''), 'null'), '^"|"$', '') AS `properties___$geoip_country_name`, + toTimeZone(person.created_at, 'UTC') AS created_at + FROM person + WHERE and(equals(person.team_id, 99999), in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) + ORDER BY argMax(toTimeZone(person.created_at, 'UTC'), person.version) DESC))) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT DISTINCT events__person.id AS person_id + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE equals(events.team_id, 99999)) AS events ON equals(persons.id, events.person_id) + ORDER BY persons.created_at DESC) AS revenue_analytics_customer ON equals(inner.customer_id, revenue_analytics_customer.id) + ORDER BY amount DESC + LIMIT 20 BY month LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1,