diff --git a/posthog/hogql_queries/insights/lifecycle_query_runner.py b/posthog/hogql_queries/insights/lifecycle_query_runner.py index 7b1e5579ab..307f508ebd 100644 --- a/posthog/hogql_queries/insights/lifecycle_query_runner.py +++ b/posthog/hogql_queries/insights/lifecycle_query_runner.py @@ -117,7 +117,7 @@ class LifecycleQueryRunner(QueryRunner): }, ) - def calculate(self): + def calculate(self) -> LifecycleQueryResponse: query = self.to_query() hogql = to_printed_hogql(query, self.team.pk) diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr new file mode 100644 index 0000000000..962c5eecbe --- /dev/null +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -0,0 +1,213 @@ +# name: TestLifecycleQueryRunner.test_sampling + ' + SELECT groupArray(start_of_period) AS date, + groupArray(counts) AS total, + status + FROM + (SELECT if(ifNull(equals(status, 'dormant'), 0), negate(sum(counts)), negate(negate(sum(counts)))) AS counts, + start_of_period, + status + FROM + (SELECT periods.start_of_period AS start_of_period, + 0 AS counts, + sec.status + FROM + (SELECT minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS start_of_period + FROM numbers(dateDiff('day', dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), dateTrunc('day', plus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')), toIntervalDay(1))))) AS numbers) AS periods + CROSS JOIN + (SELECT status + FROM + (SELECT 1) ARRAY + JOIN ['new', 'returning', 'resurrecting', 'dormant'] AS status) AS sec + ORDER BY sec.status ASC, start_of_period ASC + UNION ALL SELECT start_of_period, + count(DISTINCT person_id) AS counts, + status + FROM + (SELECT events__pdi__person.id AS person_id, + min(toTimeZone(events__pdi__person.created_at, 'UTC')) AS created_at, + arraySort(groupUniqArray(dateTrunc('day', toTimeZone(events.timestamp, 'UTC')))) AS all_activity, + arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', created_at))) AS previous_activity, + arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'UTC')))) AS following_activity, + arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous) + and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1))) + and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status, + arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1))) + and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status, + arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods, + arrayMap(x -> 'dormant', dormant_periods) AS dormant_label, + arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat, + arrayJoin(temp_concat) AS period_status_pairs, + period_status_pairs.1 AS start_of_period, + period_status_pairs.2 AS status + FROM events SAMPLE 0.1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + INNER JOIN + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), equals(events.event, '$pageview')) + GROUP BY person_id) + GROUP BY start_of_period, + status) + WHERE and(ifNull(lessOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')))), 0), ifNull(greaterOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')))), 0)) + GROUP BY start_of_period, + status + ORDER BY start_of_period ASC) + GROUP BY status + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestLifecycleQueryRunner.test_timezones + ' + SELECT groupArray(start_of_period) AS date, + groupArray(counts) AS total, + status + FROM + (SELECT if(ifNull(equals(status, 'dormant'), 0), negate(sum(counts)), negate(negate(sum(counts)))) AS counts, + start_of_period, + status + FROM + (SELECT periods.start_of_period AS start_of_period, + 0 AS counts, + sec.status + FROM + (SELECT minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS start_of_period + FROM numbers(dateDiff('day', dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), dateTrunc('day', plus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')), toIntervalDay(1))))) AS numbers) AS periods + CROSS JOIN + (SELECT status + FROM + (SELECT 1) ARRAY + JOIN ['new', 'returning', 'resurrecting', 'dormant'] AS status) AS sec + ORDER BY sec.status ASC, start_of_period ASC + UNION ALL SELECT start_of_period, + count(DISTINCT person_id) AS counts, + status + FROM + (SELECT events__pdi__person.id AS person_id, + min(toTimeZone(events__pdi__person.created_at, 'UTC')) AS created_at, + arraySort(groupUniqArray(dateTrunc('day', toTimeZone(events.timestamp, 'UTC')))) AS all_activity, + arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', created_at))) AS previous_activity, + arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'UTC')))) AS following_activity, + arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous) + and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1))) + and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status, + arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1))) + and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status, + arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods, + arrayMap(x -> 'dormant', dormant_periods) AS dormant_label, + arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat, + arrayJoin(temp_concat) AS period_status_pairs, + period_status_pairs.1 AS start_of_period, + period_status_pairs.2 AS status + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + INNER JOIN + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), equals(events.event, '$pageview')) + GROUP BY person_id) + GROUP BY start_of_period, + status) + WHERE and(ifNull(lessOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')))), 0), ifNull(greaterOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')))), 0)) + GROUP BY start_of_period, + status + ORDER BY start_of_period ASC) + GROUP BY status + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestLifecycleQueryRunner.test_timezones.1 + ' + SELECT groupArray(start_of_period) AS date, + groupArray(counts) AS total, + status + FROM + (SELECT if(ifNull(equals(status, 'dormant'), 0), negate(sum(counts)), negate(negate(sum(counts)))) AS counts, + start_of_period, + status + FROM + (SELECT periods.start_of_period AS start_of_period, + 0 AS counts, + sec.status + FROM + (SELECT minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(numbers.number)) AS start_of_period + FROM numbers(dateDiff('day', dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific'))), dateTrunc('day', plus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific')), toIntervalDay(1))))) AS numbers) AS periods + CROSS JOIN + (SELECT status + FROM + (SELECT 1) ARRAY + JOIN ['new', 'returning', 'resurrecting', 'dormant'] AS status) AS sec + ORDER BY sec.status ASC, start_of_period ASC + UNION ALL SELECT start_of_period, + count(DISTINCT person_id) AS counts, + status + FROM + (SELECT events__pdi__person.id AS person_id, + min(toTimeZone(events__pdi__person.created_at, 'US/Pacific')) AS created_at, + arraySort(groupUniqArray(dateTrunc('day', toTimeZone(events.timestamp, 'US/Pacific')))) AS all_activity, + arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', created_at))) AS previous_activity, + arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'US/Pacific')))) AS following_activity, + arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous) + and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1))) + and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status, + arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1))) + and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status, + arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods, + arrayMap(x -> 'dormant', dormant_periods) AS dormant_label, + arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat, + arrayJoin(temp_concat) AS period_status_pairs, + period_status_pairs.1 AS start_of_period, + period_status_pairs.2 AS status + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + INNER JOIN + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'US/Pacific'), plus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(1))), equals(events.event, '$pageview')) + GROUP BY person_id) + GROUP BY start_of_period, + status) + WHERE and(ifNull(lessOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific')))), 0), ifNull(greaterOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific')))), 0)) + GROUP BY start_of_period, + status + ORDER BY start_of_period ASC) + GROUP BY status + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- diff --git a/posthog/hogql_queries/insights/test/test_lifecycle_query_runner.py b/posthog/hogql_queries/insights/test/test_lifecycle_query_runner.py index df4dcbf350..35bfa44991 100644 --- a/posthog/hogql_queries/insights/test/test_lifecycle_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_lifecycle_query_runner.py @@ -1,18 +1,37 @@ from datetime import datetime - from freezegun import freeze_time - from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner from posthog.models.utils import UUIDT -from posthog.schema import DateRange, IntervalType, LifecycleQuery, EventsNode +from posthog.schema import ( + DateRange, + IntervalType, + LifecycleQuery, + EventsNode, + EventPropertyFilter, + PropertyOperator, + PersonPropertyFilter, + ActionsNode, +) from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events, + snapshot_clickhouse_queries, ) +from posthog.models import Action, ActionStep +from posthog.models.instance_setting import get_instance_setting + + +def create_action(**kwargs): + team = kwargs.pop("team") + name = kwargs.pop("name") + event_name = kwargs.pop("event_name") + action = Action.objects.create(team=team, name=name) + ActionStep.objects.create(action=action, event=event_name) + return action class TestLifecycleQueryRunner(ClickhouseTestMixin, APIBaseTest): @@ -374,3 +393,802 @@ class TestLifecycleQueryRunner(ClickhouseTestMixin, APIBaseTest): }, set(response.results), ) + + def test_lifecycle_trend(self): + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + def test_lifecycle_trend_any_event(self): + self._create_events( + event="$pageview", + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ], + ) + self._create_events( + event="$other", + data=[ + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ], + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event=None)], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + def test_lifecycle_trend_with_zero_person_ids(self): + # only a person-on-event test + if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"): + return True + + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p5", + timestamp="2020-01-13T12:00:00Z", + person_id="00000000-0000-0000-0000-000000000000", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p5", + timestamp="2020-01-14T12:00:00Z", + person_id="00000000-0000-0000-0000-000000000000", + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + def test_lifecycle_trend_prop_filtering(self): + _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"$number": 1}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-12T12:00:00Z", + properties={"$number": 1}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-13T12:00:00Z", + properties={"$number": 1}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-15T12:00:00Z", + properties={"$number": 1}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-17T12:00:00Z", + properties={"$number": 1}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-19T12:00:00Z", + properties={"$number": 1}, + ) + + _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-09T12:00:00Z", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-12T12:00:00Z", + ) + + _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "p3"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-12T12:00:00Z", + ) + + _create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"name": "p4"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p4", + timestamp="2020-01-15T12:00:00Z", + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + properties=[EventPropertyFilter(key="$number", value="1", operator=PropertyOperator.exact)], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, 0, -1, 0, -1, 0, -1, 0]}, + {"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [0, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + # entities filtering + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[ + EventsNode( + event="$pageview", + properties=[EventPropertyFilter(key="$number", value="1", operator=PropertyOperator.exact)], + ) + ], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, 0, -1, 0, -1, 0, -1, 0]}, + {"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [0, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + def test_lifecycle_trend_person_prop_filtering(self): + _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"$number": 1}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-12T12:00:00Z", + properties={"$number": 1}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-13T12:00:00Z", + properties={"$number": 1}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-15T12:00:00Z", + properties={"$number": 1}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-17T12:00:00Z", + properties={"$number": 1}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-19T12:00:00Z", + properties={"$number": 1}, + ) + + _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-09T12:00:00Z", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-12T12:00:00Z", + ) + + _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "p3"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p3", + timestamp="2020-01-12T12:00:00Z", + ) + + _create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"name": "p4"}) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p4", + timestamp="2020-01-15T12:00:00Z", + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[ + EventsNode( + event="$pageview", + properties=[PersonPropertyFilter(key="name", value="p1", operator=PropertyOperator.exact)], + ) + ], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [0, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "dormant", "data": [0, 0, -1, 0, -1, 0, -1, 0]}, + ], + ) + + def test_lifecycle_trends_distinct_id_repeat(self): + with freeze_time("2020-01-12T12:00:00Z"): + _create_person( + team_id=self.team.pk, + distinct_ids=["p1", "another_p1"], + properties={"name": "p1"}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-12T12:00:00Z", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="another_p1", + timestamp="2020-01-14T12:00:00Z", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-15T12:00:00Z", + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-17T12:00:00Z", + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-19T12:00:00Z", + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -1, 0, 0, -1, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 0, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [0, 0, 1, 0, 0, 1, 0, 1]}, + {"status": "returning", "data": [0, 0, 0, 1, 0, 0, 0, 0]}, + ], + ) + + def test_lifecycle_trend_action(self): + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + pageview_action = create_action(team=self.team, name="$pageview", event_name="$pageview") + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[ActionsNode(id=pageview_action.pk)], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + def test_lifecycle_trend_all_time(self): + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + with freeze_time("2020-01-17T13:01:01Z"): + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="all"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -1, 0, 0, -2, -1, 0, -2, 0]}, + {"status": "new", "data": [1, 0, 1, 1, 0, 0, 1, 0, 0]}, + {"status": "returning", "data": [0, 0, 0, 1, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [0, 0, 0, 1, 0, 0, 1, 0, 1]}, + ], + ) + + def test_lifecycle_trend_weeks(self): + # lifecycle weeks rounds the date to the nearest following week 2/5 -> 2/10 + self._create_events( + data=[ + ( + "p1", + [ + "2020-02-01T12:00:00Z", + "2020-02-05T12:00:00Z", + "2020-02-10T12:00:00Z", + "2020-02-15T12:00:00Z", + "2020-02-27T12:00:00Z", + "2020-03-02T12:00:00Z", + ], + ), + ("p2", ["2020-02-11T12:00:00Z", "2020-02-18T12:00:00Z"]), + ("p3", ["2020-02-12T12:00:00Z"]), + ("p4", ["2020-02-27T12:00:00Z"]), + ] + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-02-05T00:00:00Z", date_to="2020-03-09T00:00:00Z"), + interval=IntervalType.week, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + self.assertEqual( + result[0]["days"], + [ + "2020-02-03", + "2020-02-10", + "2020-02-17", + "2020-02-24", + "2020-03-02", + "2020-03-09", + ], + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, 0, -2, -1, -1, -1]}, + {"status": "new", "data": [0, 2, 0, 1, 0, 0]}, + {"status": "resurrecting", "data": [0, 0, 0, 1, 0, 0]}, + {"status": "returning", "data": [1, 1, 1, 0, 1, 0]}, + ], + ) + + def test_lifecycle_trend_months(self): + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-02-12T12:00:00Z", + "2020-03-13T12:00:00Z", + "2020-05-15T12:00:00Z", + "2020-07-17T12:00:00Z", + "2020-09-19T12:00:00Z", + ], + ), + ("p2", ["2019-12-09T12:00:00Z", "2020-02-12T12:00:00Z"]), + ("p3", ["2020-02-12T12:00:00Z"]), + ("p4", ["2020-05-15T12:00:00Z"]), + ] + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-02-01T00:00:00Z", date_to="2020-09-01T00:00:00Z"), + interval=IntervalType.month, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + def test_filter_test_accounts(self): + self._create_events( + data=[ + ( + "p1", # p1 gets test@posthog.com as email and gets filtered out + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + filterTestAccounts=True, + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, 0, 0, -1, 0, 0, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 0, 0, 0, 0, 0]}, + {"status": "returning", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, + ], + ) + + @snapshot_clickhouse_queries + def test_timezones(self): + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T23:00:00Z", + "2020-01-12T01:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + result = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result, + [ + {"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]}, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]}, + ], + ) + + self.team.timezone = "US/Pacific" + self.team.save() + + result_pacific = ( + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + ), + ) + .calculate() + .results + ) + + assertLifecycleResults( + result_pacific, + [ + { + "status": "dormant", + "data": [-1.0, -2.0, -1.0, 0.0, -2.0, 0.0, -1.0, 0.0], + }, + {"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, + {"status": "resurrecting", "data": [1, 1, 0, 1, 0, 1, 0, 1]}, + {"status": "returning", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, + ], + ) + + # Ensure running the query with sampling works + generate a snapshot that shows sampling in the query + @snapshot_clickhouse_queries + def test_sampling(self): + self._create_events( + data=[ + ( + "p1", + [ + "2020-01-11T12:00:00Z", + "2020-01-12T12:00:00Z", + "2020-01-13T12:00:00Z", + "2020-01-15T12:00:00Z", + "2020-01-17T12:00:00Z", + "2020-01-19T12:00:00Z", + ], + ), + ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), + ("p3", ["2020-01-12T12:00:00Z"]), + ("p4", ["2020-01-15T12:00:00Z"]), + ] + ) + + LifecycleQueryRunner( + team=self.team, + query=LifecycleQuery( + dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"), + interval=IntervalType.day, + series=[EventsNode(event="$pageview")], + samplingFactor=0.1, + ), + ).calculate() + + +def assertLifecycleResults(results, expected): + sorted_results = [{"status": r["status"], "data": r["data"]} for r in sorted(results, key=lambda r: r["status"])] + sorted_expected = list(sorted(expected, key=lambda r: r["status"])) + + assert sorted_results == sorted_expected