feat(experiments): sample size calculator for Funnel (#29487)

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Juraj Majerik
2025-03-06 17:34:23 +01:00
committed by GitHub
parent 14fd90d7dc
commit bfcb67526c
12 changed files with 282 additions and 89 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 279 KiB

After

Width:  |  Height:  |  Size: 286 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 282 KiB

After

Width:  |  Height:  |  Size: 290 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 87 KiB

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 88 KiB

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

After

Width:  |  Height:  |  Size: 67 KiB

View File

@@ -121,7 +121,7 @@ export function ExperimentView(): JSX.Element {
/>
</div>
<div>
<span className="card-secondary">Sample size:</span>{' '}
<span className="card-secondary">Recommended sample size:</span>{' '}
<span className="font-semibold">
{humanFriendlyNumber(
experiment.parameters.recommended_sample_size || 0,
@@ -131,7 +131,7 @@ export function ExperimentView(): JSX.Element {
</span>
</div>
<div>
<span className="card-secondary">Running time:</span>{' '}
<span className="card-secondary">Estimated running time:</span>{' '}
<span className="font-semibold">
{humanFriendlyNumber(
experiment.parameters.recommended_running_time || 0,

View File

@@ -21,6 +21,7 @@ export function RunningTimeCalculatorModal(): JSX.Element {
uniqueUsers,
averageEventsPerUser,
averagePropertyValuePerUser,
conversionRate,
metricResultLoading,
} = useValues(runningTimeCalculatorLogic({ experimentId }))
const { setMinimumDetectableEffect, setMetricIndex } = useActions(runningTimeCalculatorLogic({ experimentId }))
@@ -102,18 +103,20 @@ export function RunningTimeCalculatorModal(): JSX.Element {
<Spinner className="text-3xl transform -translate-y-[-10px]" />
</div>
</div>
) : uniqueUsers !== null && standardDeviation !== null ? (
) : (
<div className="border-t pt-2">
<div className="grid grid-cols-3 gap-4">
<div>
<div className="card-secondary">Unique users</div>
<div className="font-semibold">
~{humanFriendlyNumber(uniqueUsers || 0, 0)} persons
{uniqueUsers !== null && (
<div>
<div className="card-secondary">Unique users</div>
<div className="font-semibold">
~{humanFriendlyNumber(uniqueUsers || 0, 0)} persons
</div>
<div className="text-xs text-muted">
Last {TIMEFRAME_HISTORICAL_DATA_DAYS} days
</div>
</div>
<div className="text-xs text-muted">
Last {TIMEFRAME_HISTORICAL_DATA_DAYS} days
</div>
</div>
)}
{averageEventsPerUser !== null && (
<div>
<div className="card-secondary">Avg. events per user</div>
@@ -130,15 +133,25 @@ export function RunningTimeCalculatorModal(): JSX.Element {
</div>
</div>
)}
<div>
<div className="card-secondary">Estimated standard deviation</div>
<div className="font-semibold">
~{humanFriendlyNumber(standardDeviation, 0)}
{conversionRate !== null && (
<div>
<div className="card-secondary">Conversion rate</div>
<div className="font-semibold">
~{humanFriendlyNumber(conversionRate * 100, 2)}%
</div>
</div>
</div>
)}
{standardDeviation !== null && (
<div>
<div className="card-secondary">Estimated standard deviation</div>
<div className="font-semibold">
~{humanFriendlyNumber(standardDeviation, 0)}
</div>
</div>
)}
</div>
</div>
) : null}
)}
</div>
</div>

View File

@@ -119,4 +119,51 @@ describe('runningTimeCalculatorLogic', () => {
})
})
})
// Should match https://docs.google.com/spreadsheets/d/11alyC8n7uqewZFLKfV4UAbW-0zH__EdV_Hrk2OQ4140/edit?gid=0#gid=0
describe('calculations for FUNNEL', () => {
beforeEach(() => {
experimentLogic.actions.setExperiment({
metrics: [
{
metric_type: ExperimentMetricType.FUNNEL,
} as ExperimentMetric,
],
feature_flag: {
filters: {
multivariate: {
variants: [
{
key: 'control',
rollout_percentage: 50,
},
{
key: 'test',
rollout_percentage: 50,
},
],
},
},
} as unknown as FeatureFlagBasicType,
})
logic.actions.setMetricIndex(0)
})
it('calculates recommended sample size and running time correctly for FUNNEL', async () => {
logic.actions.setMinimumDetectableEffect(50)
logic.actions.setMetricResult({
uniqueUsers: 1000,
conversionRate: 0.1,
})
await expectLogic(logic).toFinishAllListeners()
await expectLogic(logic).toMatchValues({
minimumDetectableEffect: 50,
recommendedSampleSize: expect.closeTo(1152, 0),
recommendedRunningTime: expect.closeTo(16.1, 1),
})
})
})
})

View File

@@ -6,14 +6,122 @@ import { dayjs } from 'lib/dayjs'
import { experimentLogic } from 'scenes/experiments/experimentLogic'
import { performQuery } from '~/queries/query'
import { ExperimentMetric, ExperimentMetricType, NodeKind, TrendsQueryResponse } from '~/queries/schema/schema-general'
import { BaseMathType, CountPerActorMathType, Experiment, ExperimentMetricMathType, PropertyMathType } from '~/types'
import {
ExperimentMetric,
ExperimentMetricType,
FunnelsQuery,
NodeKind,
TrendsQuery,
TrendsQueryResponse,
} from '~/queries/schema/schema-general'
import {
BaseMathType,
CountPerActorMathType,
Experiment,
ExperimentMetricMathType,
FunnelVizType,
PropertyMathType,
} from '~/types'
import type { runningTimeCalculatorLogicType } from './runningTimeCalculatorLogicType'
export const TIMEFRAME_HISTORICAL_DATA_DAYS = 14
export const VARIANCE_SCALING_FACTOR_COUNT = 2
export const VARIANCE_SCALING_FACTOR_CONTINUOUS = 0.25
export const VARIANCE_SCALING_FACTOR_TOTAL_COUNT = 2
export const VARIANCE_SCALING_FACTOR_SUM = 0.25
const getKindField = (metric: ExperimentMetric): NodeKind => {
return metric.metric_config.kind === NodeKind.ExperimentEventMetricConfig
? NodeKind.EventsNode
: metric.metric_config.kind === NodeKind.ExperimentActionMetricConfig
? NodeKind.ActionsNode
: NodeKind.DataWarehouseNode
}
const getEventField = (metric: ExperimentMetric): string | number => {
return metric.metric_config.kind === NodeKind.ExperimentEventMetricConfig
? metric.metric_config.event
: metric.metric_config.kind === NodeKind.ExperimentActionMetricConfig
? metric.metric_config.action
: metric.metric_config.table_name
}
const getTotalCountQuery = (metric: ExperimentMetric, experiment: Experiment): TrendsQuery => {
return {
kind: NodeKind.TrendsQuery,
series: [
{
kind: getKindField(metric),
event: getEventField(metric),
math: BaseMathType.UniqueUsers,
},
{
kind: getKindField(metric),
event: getEventField(metric),
math: CountPerActorMathType.Average,
},
],
trendsFilter: {},
filterTestAccounts: experiment.exposure_criteria?.filterTestAccounts === true,
dateRange: {
date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'),
date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'),
explicitDate: true,
},
} as TrendsQuery
}
const getSumQuery = (metric: ExperimentMetric, experiment: Experiment): TrendsQuery => {
return {
kind: NodeKind.TrendsQuery,
series: [
{
kind: getKindField(metric),
event: getEventField(metric),
math: BaseMathType.UniqueUsers,
},
{
kind: getKindField(metric),
event: getEventField(metric),
math: PropertyMathType.Sum,
math_property: metric.metric_config.math_property,
math_property_type: TaxonomicFilterGroupType.NumericalEventProperties,
},
],
trendsFilter: {},
filterTestAccounts: experiment.exposure_criteria?.filterTestAccounts === true,
dateRange: {
date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'),
date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'),
explicitDate: true,
},
} as TrendsQuery
}
const getFunnelQuery = (metric: ExperimentMetric, experiment: Experiment): FunnelsQuery => {
return {
kind: NodeKind.FunnelsQuery,
series: [
{
kind: NodeKind.EventsNode,
event: '$feature_flag_called',
},
{
kind: getKindField(metric),
event: getEventField(metric),
},
],
funnelsFilter: {
funnelVizType: FunnelVizType.Steps,
},
filterTestAccounts: experiment.exposure_criteria?.filterTestAccounts === true,
dateRange: {
date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'),
date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'),
explicitDate: true,
},
interval: 'day',
} as FunnelsQuery
}
export interface RunningTimeCalculatorLogicProps {
experimentId?: Experiment['id']
@@ -31,6 +139,7 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
uniqueUsers: number
averageEventsPerUser?: number
averagePropertyValuePerUser?: number
conversionRate?: number
}) => ({ value }),
}),
reducers({
@@ -61,57 +170,14 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
return null
}
const series = []
const kindField =
metric.metric_config.kind === NodeKind.ExperimentEventMetricConfig
? NodeKind.EventsNode
: metric.metric_config.kind === NodeKind.ExperimentActionMetricConfig
? NodeKind.ActionsNode
: NodeKind.DataWarehouseNode
const eventField =
metric.metric_config.kind === NodeKind.ExperimentEventMetricConfig
? metric.metric_config.event
: metric.metric_config.kind === NodeKind.ExperimentActionMetricConfig
? metric.metric_config.action
: metric.metric_config.table_name
series.push({
kind: kindField,
event: eventField,
math: BaseMathType.UniqueUsers,
})
if (metric.metric_type === ExperimentMetricType.MEAN) {
if (metric.metric_config.math === ExperimentMetricMathType.Sum) {
series.push({
kind: kindField,
event: eventField,
math: PropertyMathType.Sum,
math_property: metric.metric_config.math_property,
math_property_type: TaxonomicFilterGroupType.NumericalEventProperties,
})
} else {
series.push({
kind: kindField,
event: eventField,
math: CountPerActorMathType.Average,
})
}
}
const query = {
kind: NodeKind.TrendsQuery,
series,
trendsFilter: {},
filterTestAccounts: values.experiment.exposure_criteria?.filterTestAccounts === true,
dateRange: {
date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'),
date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'),
explicitDate: true,
},
}
const query =
metric.metric_type === ExperimentMetricType.MEAN &&
metric.metric_config.math === ExperimentMetricMathType.TotalCount
? getTotalCountQuery(metric, values.experiment)
: metric.metric_type === ExperimentMetricType.MEAN &&
metric.metric_config.math === ExperimentMetricMathType.Sum
? getSumQuery(metric, values.experiment)
: getFunnelQuery(metric, values.experiment)
const result = (await performQuery(query)) as Partial<TrendsQueryResponse>
@@ -125,6 +191,9 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
metric.metric_config.math === ExperimentMetricMathType.Sum
? { averagePropertyValuePerUser: result?.results?.[1]?.count ?? null }
: {}),
...(metric.metric_type === ExperimentMetricType.FUNNEL
? { conversionRate: result?.results?.[1]?.count / result?.results?.[0]?.count || null }
: {}),
}
},
// For testing purposes, we want to be able set the metric result directly
@@ -154,6 +223,10 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
(metricResult: { averagePropertyValuePerUser: number }) =>
metricResult?.averagePropertyValuePerUser ?? null,
],
conversionRate: [
(s) => [s.metricResult],
(metricResult: { conversionRate: number }) => metricResult?.conversionRate ?? null,
],
variance: [
(s) => [s.metric, s.averageEventsPerUser, s.averagePropertyValuePerUser],
(metric: ExperimentMetric, averageEventsPerUser: number, averagePropertyValuePerUser: number) => {
@@ -165,17 +238,17 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
metric.metric_type === ExperimentMetricType.MEAN &&
metric.metric_config.math === ExperimentMetricMathType.TotalCount
) {
return VARIANCE_SCALING_FACTOR_COUNT * averageEventsPerUser
return VARIANCE_SCALING_FACTOR_TOTAL_COUNT * averageEventsPerUser
} else if (
metric.metric_type === ExperimentMetricType.MEAN &&
metric.metric_config.math === ExperimentMetricMathType.Sum
) {
return VARIANCE_SCALING_FACTOR_CONTINUOUS * averagePropertyValuePerUser ** 2
return VARIANCE_SCALING_FACTOR_SUM * averagePropertyValuePerUser ** 2
}
return null
},
],
standardDeviation: [(s) => [s.variance], (variance: number) => Math.sqrt(variance)],
standardDeviation: [(s) => [s.variance], (variance: number) => (variance ? Math.sqrt(variance) : null)],
numberOfVariants: [
(s) => [s.experiment],
(experiment: Experiment) => experiment.feature_flag?.filters.multivariate?.variants.length,
@@ -187,6 +260,7 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
s.variance,
s.averageEventsPerUser,
s.averagePropertyValuePerUser,
s.conversionRate,
s.numberOfVariants,
],
(
@@ -195,6 +269,7 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
variance: number,
averageEventsPerUser: number,
averagePropertyValuePerUser: number,
conversionRate: number,
numberOfVariants: number
): number | null => {
if (!metric) {
@@ -204,38 +279,96 @@ export const runningTimeCalculatorLogic = kea<runningTimeCalculatorLogicType>([
const minimumDetectableEffectDecimal = minimumDetectableEffect / 100
let d // Represents the absolute effect size (difference we want to detect)
let sampleSizeFormula // The correct sample size formula for each metric type
if (
metric.metric_type === ExperimentMetricType.MEAN &&
metric.metric_config.math === ExperimentMetricMathType.TotalCount
) {
/*
Count Per User Metric:
- "mean" is the average number of events per user (e.g., clicks per user).
- MDE is applied as a percentage of this mean to compute `d`.
Formula:
d = MDE * averageEventsPerUser
*/
d = minimumDetectableEffectDecimal * averageEventsPerUser
/*
Sample size formula:
N = (16 * variance) / d^2
Where:
- `16` comes from statistical power analysis:
- Based on a 95% confidence level (Z_alpha/2 = 1.96) and 80% power (Z_beta = 0.84),
the combined squared Z-scores yield approximately 16.
- `variance` is the estimated variance of the event count per user.
- `d` is the absolute effect size (MDE * mean).
*/
sampleSizeFormula = (16 * variance) / d ** 2
} else if (
metric.metric_type === ExperimentMetricType.MEAN &&
metric.metric_config.math === ExperimentMetricMathType.Sum
) {
/*
Continuous property metric:
- "mean" is the average value of the measured property per user (e.g., revenue per user).
- MDE is applied as a percentage of this mean to compute `d`.
Formula:
d = MDE * averagePropertyValuePerUser
*/
d = minimumDetectableEffectDecimal * averagePropertyValuePerUser
/*
Sample Size Formula for Continuous metrics:
N = (16 * variance) / d^2
Where:
- `variance` is the estimated variance of the continuous property.
- The formula is identical to the Count metric case.
*/
sampleSizeFormula = (16 * variance) / d ** 2
} else if (metric.metric_type === ExperimentMetricType.FUNNEL) {
/*
Binomial metric (conversion rate):
- Here, "mean" does not exist in the same way as for count/continuous metrics.
- Instead, we use `p`, the baseline conversion rate (historical probability of success).
- MDE is applied as an absolute percentage change to `p`.
Formula:
d = MDE * conversionRate
*/
d = minimumDetectableEffectDecimal * conversionRate
/*
Sample size formula:
N = (16 * p * (1 - p)) / d^2
Where:
- `p` is the historical conversion rate (baseline success probability).
- `d` is the absolute MDE (e.g., detecting a 5% increase means `d = 0.05`).
- The variance is inherent in `p(1 - p)`, which represents binomial variance.
*/
if (conversionRate !== null) {
sampleSizeFormula = (16 * conversionRate * (1 - conversionRate)) / d ** 2
} else {
return null
}
}
if (!d) {
if (!d || !sampleSizeFormula) {
return null
}
/*
N = (16 * variance) / d^2
Where:
- `16` comes from statistical power analysis:
- Based on a 95% confidence level (Z_alpha/2 = 1.96) and 80% power (Z_beta = 0.84),
the combined squared Z-scores yield approximately 16.
- `variance` is the estimated variance of the metric being measured.
- `d` is the absolute effect size (MDE * mean).
- The formula ensures that larger variance increases required sample size,
and smaller detectable effects (MDE) also require more samples.
*/
return ((16 * variance) / (d * d)) * numberOfVariants
return sampleSizeFormula * numberOfVariants
},
],
recommendedRunningTime: [
(s) => [s.recommendedSampleSize, s.uniqueUsers],
(recommendedSampleSize: number, uniqueUsers: number): number => {