feat(hogql-queries): tests for lifecycle runner (#18568)

This commit is contained in:
Marius Andra
2023-11-13 15:01:12 +01:00
committed by GitHub
parent 03498b2ba7
commit 8ee0ac490e
3 changed files with 1035 additions and 4 deletions

View File

@@ -117,7 +117,7 @@ class LifecycleQueryRunner(QueryRunner):
},
)
def calculate(self):
def calculate(self) -> LifecycleQueryResponse:
query = self.to_query()
hogql = to_printed_hogql(query, self.team.pk)

View File

@@ -0,0 +1,213 @@
# name: TestLifecycleQueryRunner.test_sampling
'
SELECT groupArray(start_of_period) AS date,
groupArray(counts) AS total,
status
FROM
(SELECT if(ifNull(equals(status, 'dormant'), 0), negate(sum(counts)), negate(negate(sum(counts)))) AS counts,
start_of_period,
status
FROM
(SELECT periods.start_of_period AS start_of_period,
0 AS counts,
sec.status
FROM
(SELECT minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS start_of_period
FROM numbers(dateDiff('day', dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), dateTrunc('day', plus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')), toIntervalDay(1))))) AS numbers) AS periods
CROSS JOIN
(SELECT status
FROM
(SELECT 1) ARRAY
JOIN ['new', 'returning', 'resurrecting', 'dormant'] AS status) AS sec
ORDER BY sec.status ASC, start_of_period ASC
UNION ALL SELECT start_of_period,
count(DISTINCT person_id) AS counts,
status
FROM
(SELECT events__pdi__person.id AS person_id,
min(toTimeZone(events__pdi__person.created_at, 'UTC')) AS created_at,
arraySort(groupUniqArray(dateTrunc('day', toTimeZone(events.timestamp, 'UTC')))) AS all_activity,
arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', created_at))) AS previous_activity,
arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'UTC')))) AS following_activity,
arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous)
and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1)))
and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status,
arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1)))
and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status,
arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods,
arrayMap(x -> 'dormant', dormant_periods) AS dormant_label,
arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat,
arrayJoin(temp_concat) AS period_status_pairs,
period_status_pairs.1 AS start_of_period,
period_status_pairs.2 AS status
FROM events SAMPLE 0.1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id)
INNER JOIN
(SELECT argMax(person.created_at, person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id)
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), equals(events.event, '$pageview'))
GROUP BY person_id)
GROUP BY start_of_period,
status)
WHERE and(ifNull(lessOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')))), 0), ifNull(greaterOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')))), 0))
GROUP BY start_of_period,
status
ORDER BY start_of_period ASC)
GROUP BY status
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1
'
---
# name: TestLifecycleQueryRunner.test_timezones
'
SELECT groupArray(start_of_period) AS date,
groupArray(counts) AS total,
status
FROM
(SELECT if(ifNull(equals(status, 'dormant'), 0), negate(sum(counts)), negate(negate(sum(counts)))) AS counts,
start_of_period,
status
FROM
(SELECT periods.start_of_period AS start_of_period,
0 AS counts,
sec.status
FROM
(SELECT minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS start_of_period
FROM numbers(dateDiff('day', dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), dateTrunc('day', plus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')), toIntervalDay(1))))) AS numbers) AS periods
CROSS JOIN
(SELECT status
FROM
(SELECT 1) ARRAY
JOIN ['new', 'returning', 'resurrecting', 'dormant'] AS status) AS sec
ORDER BY sec.status ASC, start_of_period ASC
UNION ALL SELECT start_of_period,
count(DISTINCT person_id) AS counts,
status
FROM
(SELECT events__pdi__person.id AS person_id,
min(toTimeZone(events__pdi__person.created_at, 'UTC')) AS created_at,
arraySort(groupUniqArray(dateTrunc('day', toTimeZone(events.timestamp, 'UTC')))) AS all_activity,
arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', created_at))) AS previous_activity,
arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'UTC')))) AS following_activity,
arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous)
and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1)))
and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status,
arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1)))
and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status,
arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods,
arrayMap(x -> 'dormant', dormant_periods) AS dormant_label,
arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat,
arrayJoin(temp_concat) AS period_status_pairs,
period_status_pairs.1 AS start_of_period,
period_status_pairs.2 AS status
FROM events
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id)
INNER JOIN
(SELECT argMax(person.created_at, person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id)
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), equals(events.event, '$pageview'))
GROUP BY person_id)
GROUP BY start_of_period,
status)
WHERE and(ifNull(lessOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC')))), 0), ifNull(greaterOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC')))), 0))
GROUP BY start_of_period,
status
ORDER BY start_of_period ASC)
GROUP BY status
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1
'
---
# name: TestLifecycleQueryRunner.test_timezones.1
'
SELECT groupArray(start_of_period) AS date,
groupArray(counts) AS total,
status
FROM
(SELECT if(ifNull(equals(status, 'dormant'), 0), negate(sum(counts)), negate(negate(sum(counts)))) AS counts,
start_of_period,
status
FROM
(SELECT periods.start_of_period AS start_of_period,
0 AS counts,
sec.status
FROM
(SELECT minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(numbers.number)) AS start_of_period
FROM numbers(dateDiff('day', dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific'))), dateTrunc('day', plus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific')), toIntervalDay(1))))) AS numbers) AS periods
CROSS JOIN
(SELECT status
FROM
(SELECT 1) ARRAY
JOIN ['new', 'returning', 'resurrecting', 'dormant'] AS status) AS sec
ORDER BY sec.status ASC, start_of_period ASC
UNION ALL SELECT start_of_period,
count(DISTINCT person_id) AS counts,
status
FROM
(SELECT events__pdi__person.id AS person_id,
min(toTimeZone(events__pdi__person.created_at, 'US/Pacific')) AS created_at,
arraySort(groupUniqArray(dateTrunc('day', toTimeZone(events.timestamp, 'US/Pacific')))) AS all_activity,
arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', created_at))) AS previous_activity,
arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'US/Pacific')))) AS following_activity,
arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous)
and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1)))
and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status,
arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1)))
and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status,
arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods,
arrayMap(x -> 'dormant', dormant_periods) AS dormant_label,
arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat,
arrayJoin(temp_concat) AS period_status_pairs,
period_status_pairs.1 AS start_of_period,
period_status_pairs.2 AS status
FROM events
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id)
INNER JOIN
(SELECT argMax(person.created_at, person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id)
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), minus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'US/Pacific'), plus(dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(1))), equals(events.event, '$pageview'))
GROUP BY person_id)
GROUP BY start_of_period,
status)
WHERE and(ifNull(lessOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific')))), 0), ifNull(greaterOrEquals(start_of_period, dateTrunc('day', assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific')))), 0))
GROUP BY start_of_period,
status
ORDER BY start_of_period ASC)
GROUP BY status
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1
'
---

View File

@@ -1,18 +1,37 @@
from datetime import datetime
from freezegun import freeze_time
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner
from posthog.models.utils import UUIDT
from posthog.schema import DateRange, IntervalType, LifecycleQuery, EventsNode
from posthog.schema import (
DateRange,
IntervalType,
LifecycleQuery,
EventsNode,
EventPropertyFilter,
PropertyOperator,
PersonPropertyFilter,
ActionsNode,
)
from posthog.test.base import (
APIBaseTest,
ClickhouseTestMixin,
_create_event,
_create_person,
flush_persons_and_events,
snapshot_clickhouse_queries,
)
from posthog.models import Action, ActionStep
from posthog.models.instance_setting import get_instance_setting
def create_action(**kwargs):
team = kwargs.pop("team")
name = kwargs.pop("name")
event_name = kwargs.pop("event_name")
action = Action.objects.create(team=team, name=name)
ActionStep.objects.create(action=action, event=event_name)
return action
class TestLifecycleQueryRunner(ClickhouseTestMixin, APIBaseTest):
@@ -374,3 +393,802 @@ class TestLifecycleQueryRunner(ClickhouseTestMixin, APIBaseTest):
},
set(response.results),
)
def test_lifecycle_trend(self):
self._create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
def test_lifecycle_trend_any_event(self):
self._create_events(
event="$pageview",
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
],
)
self._create_events(
event="$other",
data=[
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
],
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event=None)],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
def test_lifecycle_trend_with_zero_person_ids(self):
# only a person-on-event test
if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"):
return True
self._create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p5",
timestamp="2020-01-13T12:00:00Z",
person_id="00000000-0000-0000-0000-000000000000",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p5",
timestamp="2020-01-14T12:00:00Z",
person_id="00000000-0000-0000-0000-000000000000",
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
def test_lifecycle_trend_prop_filtering(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-11T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-12T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-13T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-15T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-17T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-19T12:00:00Z",
properties={"$number": 1},
)
_create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p2",
timestamp="2020-01-09T12:00:00Z",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p2",
timestamp="2020-01-12T12:00:00Z",
)
_create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "p3"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p3",
timestamp="2020-01-12T12:00:00Z",
)
_create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"name": "p4"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p4",
timestamp="2020-01-15T12:00:00Z",
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
properties=[EventPropertyFilter(key="$number", value="1", operator=PropertyOperator.exact)],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, 0, -1, 0, -1, 0, -1, 0]},
{"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [0, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
# entities filtering
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[
EventsNode(
event="$pageview",
properties=[EventPropertyFilter(key="$number", value="1", operator=PropertyOperator.exact)],
)
],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, 0, -1, 0, -1, 0, -1, 0]},
{"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [0, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
def test_lifecycle_trend_person_prop_filtering(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-11T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-12T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-13T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-15T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-17T12:00:00Z",
properties={"$number": 1},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-19T12:00:00Z",
properties={"$number": 1},
)
_create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "p2"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p2",
timestamp="2020-01-09T12:00:00Z",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p2",
timestamp="2020-01-12T12:00:00Z",
)
_create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "p3"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p3",
timestamp="2020-01-12T12:00:00Z",
)
_create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"name": "p4"})
_create_event(
team=self.team,
event="$pageview",
distinct_id="p4",
timestamp="2020-01-15T12:00:00Z",
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[
EventsNode(
event="$pageview",
properties=[PersonPropertyFilter(key="name", value="p1", operator=PropertyOperator.exact)],
)
],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [0, 0, 0, 1, 0, 1, 0, 1]},
{"status": "dormant", "data": [0, 0, -1, 0, -1, 0, -1, 0]},
],
)
def test_lifecycle_trends_distinct_id_repeat(self):
with freeze_time("2020-01-12T12:00:00Z"):
_create_person(
team_id=self.team.pk,
distinct_ids=["p1", "another_p1"],
properties={"name": "p1"},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-12T12:00:00Z",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="another_p1",
timestamp="2020-01-14T12:00:00Z",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-15T12:00:00Z",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-17T12:00:00Z",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id="p1",
timestamp="2020-01-19T12:00:00Z",
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -1, 0, 0, -1, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 0, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [0, 0, 1, 0, 0, 1, 0, 1]},
{"status": "returning", "data": [0, 0, 0, 1, 0, 0, 0, 0]},
],
)
def test_lifecycle_trend_action(self):
self._create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
pageview_action = create_action(team=self.team, name="$pageview", event_name="$pageview")
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[ActionsNode(id=pageview_action.pk)],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
def test_lifecycle_trend_all_time(self):
self._create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
with freeze_time("2020-01-17T13:01:01Z"):
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="all"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -1, 0, 0, -2, -1, 0, -2, 0]},
{"status": "new", "data": [1, 0, 1, 1, 0, 0, 1, 0, 0]},
{"status": "returning", "data": [0, 0, 0, 1, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [0, 0, 0, 1, 0, 0, 1, 0, 1]},
],
)
def test_lifecycle_trend_weeks(self):
# lifecycle weeks rounds the date to the nearest following week 2/5 -> 2/10
self._create_events(
data=[
(
"p1",
[
"2020-02-01T12:00:00Z",
"2020-02-05T12:00:00Z",
"2020-02-10T12:00:00Z",
"2020-02-15T12:00:00Z",
"2020-02-27T12:00:00Z",
"2020-03-02T12:00:00Z",
],
),
("p2", ["2020-02-11T12:00:00Z", "2020-02-18T12:00:00Z"]),
("p3", ["2020-02-12T12:00:00Z"]),
("p4", ["2020-02-27T12:00:00Z"]),
]
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-02-05T00:00:00Z", date_to="2020-03-09T00:00:00Z"),
interval=IntervalType.week,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
self.assertEqual(
result[0]["days"],
[
"2020-02-03",
"2020-02-10",
"2020-02-17",
"2020-02-24",
"2020-03-02",
"2020-03-09",
],
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, 0, -2, -1, -1, -1]},
{"status": "new", "data": [0, 2, 0, 1, 0, 0]},
{"status": "resurrecting", "data": [0, 0, 0, 1, 0, 0]},
{"status": "returning", "data": [1, 1, 1, 0, 1, 0]},
],
)
def test_lifecycle_trend_months(self):
self._create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-02-12T12:00:00Z",
"2020-03-13T12:00:00Z",
"2020-05-15T12:00:00Z",
"2020-07-17T12:00:00Z",
"2020-09-19T12:00:00Z",
],
),
("p2", ["2019-12-09T12:00:00Z", "2020-02-12T12:00:00Z"]),
("p3", ["2020-02-12T12:00:00Z"]),
("p4", ["2020-05-15T12:00:00Z"]),
]
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-02-01T00:00:00Z", date_to="2020-09-01T00:00:00Z"),
interval=IntervalType.month,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
def test_filter_test_accounts(self):
self._create_events(
data=[
(
"p1", # p1 gets test@posthog.com as email and gets filtered out
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
filterTestAccounts=True,
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, 0, 0, -1, 0, 0, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 0, 0, 0, 0, 0]},
{"status": "returning", "data": [0, 0, 0, 0, 0, 0, 0, 0]},
],
)
@snapshot_clickhouse_queries
def test_timezones(self):
self._create_events(
data=[
(
"p1",
[
"2020-01-11T23:00:00Z",
"2020-01-12T01:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
result = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result,
[
{"status": "dormant", "data": [0, -2, -1, 0, -2, 0, -1, 0]},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 0, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [1, 1, 0, 0, 0, 0, 0, 0]},
],
)
self.team.timezone = "US/Pacific"
self.team.save()
result_pacific = (
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
),
)
.calculate()
.results
)
assertLifecycleResults(
result_pacific,
[
{
"status": "dormant",
"data": [-1.0, -2.0, -1.0, 0.0, -2.0, 0.0, -1.0, 0.0],
},
{"status": "new", "data": [1, 0, 0, 1, 0, 0, 0, 0]},
{"status": "resurrecting", "data": [1, 1, 0, 1, 0, 1, 0, 1]},
{"status": "returning", "data": [0, 0, 0, 0, 0, 0, 0, 0]},
],
)
# Ensure running the query with sampling works + generate a snapshot that shows sampling in the query
@snapshot_clickhouse_queries
def test_sampling(self):
self._create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
LifecycleQueryRunner(
team=self.team,
query=LifecycleQuery(
dateRange=DateRange(date_from="2020-01-12T00:00:00Z", date_to="2020-01-19T00:00:00Z"),
interval=IntervalType.day,
series=[EventsNode(event="$pageview")],
samplingFactor=0.1,
),
).calculate()
def assertLifecycleResults(results, expected):
sorted_results = [{"status": r["status"], "data": r["data"]} for r in sorted(results, key=lambda r: r["status"])]
sorted_expected = list(sorted(expected, key=lambda r: r["status"]))
assert sorted_results == sorted_expected