diff --git a/docker-compose.dev-minimal.yml b/docker-compose.dev-minimal.yml index 2135777855..fb4ac0f903 100644 --- a/docker-compose.dev-minimal.yml +++ b/docker-compose.dev-minimal.yml @@ -81,7 +81,7 @@ services: # (maybe only in Pycharm) keeps many idle transactions open # and eventually kills postgres, these settings aim to stop that happening. # They are only for DEV and should not be used in production. - command: postgres -c max_connections=1000 -c idle_in_transaction_session_timeout=300000 + command: postgres -c max_connections=1000 -c idle_in_transaction_session_timeout=300000 -c max_prepared_transactions=10 redis: extends: file: docker-compose.base.yml diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 433f95e067..9bc772f038 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -81,7 +81,7 @@ services: # (maybe only in Pycharm) keeps many idle transactions open # and eventually kills postgres, these settings aim to stop that happening. # They are only for DEV and should not be used in production. - command: postgres -c max_connections=1000 -c idle_in_transaction_session_timeout=300000 + command: postgres -c max_connections=1000 -c idle_in_transaction_session_timeout=300000 -c max_prepared_transactions=10 counters_db: extends: diff --git a/plugin-server/package.json b/plugin-server/package.json index 59d7fc1d26..7686271162 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -26,13 +26,15 @@ "prettier:check": "prettier --check .", "prepublishOnly": "pnpm build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", - "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && cd plugin-server && pnpm run setup:test:cyclotron && pnpm run setup:test:counters", + "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && cd plugin-server && pnpm run setup:test:cyclotron && pnpm run setup:test:counters && pnpm run setup:test:persons-migration-db:init", "setup:test:cyclotron": "CYCLOTRON_DATABASE_NAME=test_cyclotron ../rust/bin/migrate-cyclotron", "setup:test:counters": "TEST=1 COUNTERS_DATABASE_URL='postgres://posthog:posthog@localhost:5432/test_counters' ./bin/migrate-counters.sh", "migrate:counters": "./bin/migrate-counters.sh", "migrate:counters:test": "COUNTERS_DATABASE_URL='postgres://posthog:posthog@localhost:5432/test_counters' node-pg-migrate up --migrations-dir src/migrations", "migrate:counters:down": "node-pg-migrate down --migrations-dir src/migrations", "migrate:counters:create": "node-pg-migrate create --javascript-file --migrations-dir src/migrations", + "setup:test:persons-migration-db:init": "ts-node src/scripts/setup-persons-migration-db.ts", + "setup:test:persons-migration-db:drop": "ts-node src/scripts/setup-persons-migration-db.ts --drop", "services:start": "cd .. && docker compose -f docker-compose.dev.yml up", "services:stop": "cd .. && docker compose -f docker-compose.dev.yml down", "services:clean": "cd .. && docker compose -f docker-compose.dev.yml rm -v", diff --git a/plugin-server/sql/create_persons_tables.sql b/plugin-server/sql/create_persons_tables.sql new file mode 100644 index 0000000000..8ef95b506d --- /dev/null +++ b/plugin-server/sql/create_persons_tables.sql @@ -0,0 +1,133 @@ +-- Persons DB schema for test harness with secondary DB (secondary DB used by dual-write) +-- Minimal compatible DDL for PostgresPersonRepository operations for testing + +CREATE TABLE IF NOT EXISTS posthog_person ( + id BIGSERIAL PRIMARY KEY, + uuid UUID NOT NULL UNIQUE, + created_at TIMESTAMPTZ NOT NULL, + team_id INTEGER NOT NULL, + properties JSONB NOT NULL DEFAULT '{}'::jsonb, + properties_last_updated_at JSONB NOT NULL DEFAULT '{}'::jsonb, + properties_last_operation JSONB NOT NULL DEFAULT '{}'::jsonb, + is_user_id INTEGER NULL, + is_identified BOOLEAN NOT NULL DEFAULT false, + version BIGINT NULL +); + +-- Helpful index for updatePersonAssertVersion +CREATE INDEX IF NOT EXISTS posthog_person_team_uuid_idx + ON posthog_person (team_id, uuid); + +-- Distinct IDs +CREATE TABLE IF NOT EXISTS posthog_persondistinctid ( + id BIGSERIAL PRIMARY KEY, + distinct_id VARCHAR(400) NOT NULL, + person_id BIGINT NOT NULL, + team_id INTEGER NOT NULL, + version BIGINT NULL +); + +-- Add both foreign key constraints to match production schema +-- The deferred constraint needs CASCADE for delete operations to work +-- Drop constraints if they exist first to ensure clean state +DO $$ +BEGIN + ALTER TABLE posthog_persondistinctid + DROP CONSTRAINT IF EXISTS posthog_persondistin_person_id_5d655bba_fk_posthog_p; + ALTER TABLE posthog_persondistinctid + DROP CONSTRAINT IF EXISTS posthog_persondistinctid_person_id_5d655bba_fk; + + ALTER TABLE posthog_persondistinctid + ADD CONSTRAINT posthog_persondistin_person_id_5d655bba_fk_posthog_p + FOREIGN KEY (person_id) + REFERENCES posthog_person(id) + ON DELETE CASCADE + DEFERRABLE INITIALLY DEFERRED; + + ALTER TABLE posthog_persondistinctid + ADD CONSTRAINT posthog_persondistinctid_person_id_5d655bba_fk + FOREIGN KEY (person_id) + REFERENCES posthog_person(id) + ON DELETE CASCADE; +END $$; + +-- Create the unique constraint (not just index) to match production +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_constraint + WHERE conname = 'unique distinct_id for team' + AND conrelid = 'posthog_persondistinctid'::regclass + ) THEN + ALTER TABLE posthog_persondistinctid + ADD CONSTRAINT "unique distinct_id for team" + UNIQUE (team_id, distinct_id); + END IF; +END $$; + +CREATE INDEX IF NOT EXISTS posthog_persondistinctid_person_id_5d655bba + ON posthog_persondistinctid (person_id); + +-- Personless distinct IDs (merge queue helpers) +CREATE TABLE IF NOT EXISTS posthog_personlessdistinctid ( + team_id INTEGER NOT NULL, + distinct_id TEXT NOT NULL, + is_merged BOOLEAN NOT NULL DEFAULT false, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (team_id, distinct_id) +); + +-- Cohort membership by person (only person_id is touched by repo) +CREATE TABLE IF NOT EXISTS posthog_cohortpeople ( + id BIGSERIAL PRIMARY KEY, + cohort_id INTEGER NOT NULL, + person_id BIGINT NOT NULL, + version INTEGER NULL +); + +-- Add both foreign key constraints to match production schema +ALTER TABLE posthog_cohortpeople + ADD CONSTRAINT posthog_cohortpeople_person_id_33da7d3f_fk_posthog_person_id + FOREIGN KEY (person_id) + REFERENCES posthog_person(id) + ON DELETE CASCADE + DEFERRABLE INITIALLY DEFERRED; + +ALTER TABLE posthog_cohortpeople + ADD CONSTRAINT posthog_cohortpeople_person_id_33da7d3f_fk + FOREIGN KEY (person_id) + REFERENCES posthog_person(id) + ON DELETE CASCADE; + +CREATE INDEX IF NOT EXISTS posthog_cohortpeople_person_id_33da7d3f + ON posthog_cohortpeople (person_id); + +-- Index from Django model Meta class +CREATE INDEX IF NOT EXISTS posthog_cohortpeople_cohort_person_idx + ON posthog_cohortpeople (cohort_id, person_id); + +-- Feature flag hash key overrides (referenced during person merges) +CREATE TABLE IF NOT EXISTS posthog_featureflaghashkeyoverride ( + id BIGSERIAL PRIMARY KEY, + team_id INTEGER NOT NULL, + person_id BIGINT NOT NULL, + feature_flag_key TEXT NOT NULL, + hash_key TEXT NOT NULL +); + +-- Add both foreign key constraints to match production schema +ALTER TABLE posthog_featureflaghashkeyoverride + ADD CONSTRAINT posthog_featureflagh_person_id_7e517f7c_fk_posthog_p + FOREIGN KEY (person_id) + REFERENCES posthog_person(id) + ON DELETE CASCADE + DEFERRABLE INITIALLY DEFERRED; + +ALTER TABLE posthog_featureflaghashkeyoverride + ADD CONSTRAINT posthog_featureflaghashkeyoverride_person_id_7e517f7c_fk + FOREIGN KEY (person_id) + REFERENCES posthog_person(id) + ON DELETE CASCADE; + +CREATE INDEX IF NOT EXISTS posthog_featureflaghashkeyoverride_person_id_7e517f7c + ON posthog_featureflaghashkeyoverride (person_id); \ No newline at end of file diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 44833c6762..1a345907ff 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -36,6 +36,12 @@ export function getDefaultConfig(): PluginsServerConfig { ? 'postgres://posthog:posthog@localhost:5432/posthog' : '', PERSONS_READONLY_DATABASE_URL: '', + PERSONS_MIGRATION_DATABASE_URL: isTestEnv() + ? 'postgres://posthog:posthog@localhost:5432/test_posthog_persons_migration' + : isDevEnv() + ? 'postgres://posthog:posthog@localhost:5432/posthog_persons_migration' + : '', + PERSONS_MIGRATION_READONLY_DATABASE_URL: '', POSTGRES_CONNECTION_POOL_SIZE: 10, POSTHOG_DB_NAME: null, POSTHOG_DB_USER: 'postgres', @@ -285,6 +291,8 @@ export function getDefaultConfig(): PluginsServerConfig { GROUP_BATCH_WRITING_MAX_CONCURRENT_UPDATES: 10, GROUP_BATCH_WRITING_OPTIMISTIC_UPDATE_RETRY_INTERVAL_MS: 50, GROUP_BATCH_WRITING_MAX_OPTIMISTIC_UPDATE_RETRIES: 5, + PERSONS_DUAL_WRITE_ENABLED: false, + PERSONS_DUAL_WRITE_COMPARISON_ENABLED: false, USE_DYNAMIC_EVENT_INGESTION_RESTRICTION_CONFIG: false, // Messaging diff --git a/plugin-server/src/ingestion/ingestion-consumer.ts b/plugin-server/src/ingestion/ingestion-consumer.ts index 09882e9f0f..d759b7f964 100644 --- a/plugin-server/src/ingestion/ingestion-consumer.ts +++ b/plugin-server/src/ingestion/ingestion-consumer.ts @@ -31,6 +31,7 @@ import { BatchWritingGroupStore } from '../worker/ingestion/groups/batch-writing import { GroupStoreForBatch } from '../worker/ingestion/groups/group-store-for-batch.interface' import { BatchWritingPersonsStore } from '../worker/ingestion/persons/batch-writing-person-store' import { FlushResult, PersonsStoreForBatch } from '../worker/ingestion/persons/persons-store-for-batch' +import { PostgresDualWritePersonRepository } from '../worker/ingestion/persons/repositories/postgres-dualwrite-person-repository' import { PostgresPersonRepository } from '../worker/ingestion/persons/repositories/postgres-person-repository' import { deduplicateEvents } from './deduplication/events' import { DeduplicationRedis, createDeduplicationRedis } from './deduplication/redis-client' @@ -152,20 +153,25 @@ export class IngestionConsumer { this.ingestionWarningLimiter = new MemoryRateLimiter(1, 1.0 / 3600) this.hogTransformer = new HogTransformerService(hub) - this.personStore = new BatchWritingPersonsStore( - new PostgresPersonRepository(this.hub.db.postgres, { - calculatePropertiesSize: this.hub.PERSON_UPDATE_CALCULATE_PROPERTIES_SIZE, - personPropertiesDbConstraintLimitBytes: this.hub.PERSON_PROPERTIES_DB_CONSTRAINT_LIMIT_BYTES, - personPropertiesTrimTargetBytes: this.hub.PERSON_PROPERTIES_TRIM_TARGET_BYTES, - }), - this.hub.db.kafkaProducer, - { - dbWriteMode: this.hub.PERSON_BATCH_WRITING_DB_WRITE_MODE, - maxConcurrentUpdates: this.hub.PERSON_BATCH_WRITING_MAX_CONCURRENT_UPDATES, - maxOptimisticUpdateRetries: this.hub.PERSON_BATCH_WRITING_MAX_OPTIMISTIC_UPDATE_RETRIES, - optimisticUpdateRetryInterval: this.hub.PERSON_BATCH_WRITING_OPTIMISTIC_UPDATE_RETRY_INTERVAL_MS, - } - ) + const personRepositoryOptions = { + calculatePropertiesSize: this.hub.PERSON_UPDATE_CALCULATE_PROPERTIES_SIZE, + comparisonEnabled: this.hub.PERSONS_DUAL_WRITE_COMPARISON_ENABLED, + } + + const personRepository = this.hub.PERSONS_DUAL_WRITE_ENABLED + ? new PostgresDualWritePersonRepository( + this.hub.db.postgres, + this.hub.db.postgresPersonMigration, + personRepositoryOptions + ) + : new PostgresPersonRepository(this.hub.db.postgres, personRepositoryOptions) + + this.personStore = new BatchWritingPersonsStore(personRepository, this.hub.db.kafkaProducer, { + dbWriteMode: this.hub.PERSON_BATCH_WRITING_DB_WRITE_MODE, + maxConcurrentUpdates: this.hub.PERSON_BATCH_WRITING_MAX_CONCURRENT_UPDATES, + maxOptimisticUpdateRetries: this.hub.PERSON_BATCH_WRITING_MAX_OPTIMISTIC_UPDATE_RETRIES, + optimisticUpdateRetryInterval: this.hub.PERSON_BATCH_WRITING_OPTIMISTIC_UPDATE_RETRY_INTERVAL_MS, + }) this.groupStore = new BatchWritingGroupStore(this.hub, { maxConcurrentUpdates: this.hub.GROUP_BATCH_WRITING_MAX_CONCURRENT_UPDATES, diff --git a/plugin-server/src/scripts/setup-persons-migration-db.ts b/plugin-server/src/scripts/setup-persons-migration-db.ts new file mode 100644 index 0000000000..53ec98321d --- /dev/null +++ b/plugin-server/src/scripts/setup-persons-migration-db.ts @@ -0,0 +1,102 @@ +import fs from 'fs' +import path from 'path' +import { Client } from 'pg' + +function parseDb(urlStr: string): { adminUrl: string; dbName: string } { + const u = new URL(urlStr) + const dbName = (u.pathname || '/').replace(/^\//, '') || 'postgres' + const admin = new URL(u.toString()) + admin.pathname = '/postgres' + return { adminUrl: admin.toString(), dbName } +} + +async function ensureDbExists(adminUrl: string, dbName: string): Promise { + const admin = new Client({ connectionString: adminUrl }) + await admin.connect() + try { + const { rows } = await admin.query('SELECT 1 FROM pg_database WHERE datname = $1', [dbName]) + if (rows.length === 0) { + await admin.query(`CREATE DATABASE ${JSON.stringify(dbName).replace(/^"|"$/g, '')}`) + } + } finally { + await admin.end() + } +} + +async function dropDbIfExists(adminUrl: string, dbName: string): Promise { + const admin = new Client({ connectionString: adminUrl }) + await admin.connect() + try { + // terminate existing connections to allow DROP + await admin.query( + ` + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE datname = $1 AND pid <> pg_backend_pid() + `, + [dbName] + ) + await admin.query(`DROP DATABASE IF EXISTS ${JSON.stringify(dbName).replace(/^"|"$/g, '')}`) + } finally { + await admin.end() + } +} + +async function applySchema(dbUrl: string, sqlFile: string): Promise { + const sql = fs.readFileSync(sqlFile, 'utf8') + const client = new Client({ connectionString: dbUrl }) + await client.connect() + try { + await client.query(sql) + } finally { + await client.end() + } +} + +async function checkPreparedTransactions(dbUrl: string): Promise { + const client = new Client({ connectionString: dbUrl }) + await client.connect() + try { + const { rows } = await client.query(`SHOW max_prepared_transactions`) + const val = parseInt(rows[0].max_prepared_transactions, 10) + if (!Number.isFinite(val) || val <= 0) { + console.warn( + 'Warning: max_prepared_transactions is 0; two-phase commit will not work. Set it > 0 for full dual-write tests.' + ) + } + } catch { + // ignore + } finally { + await client.end() + } +} + +async function main() { + const defaultUrl = 'postgres://posthog:posthog@localhost:5432/test_posthog_persons_migration' + const dbUrl = process.env.PERSONS_MIGRATION_DATABASE_URL || defaultUrl + + const { adminUrl, dbName } = parseDb(dbUrl) + const sqlPath = path.resolve(__dirname, '../../sql/create_persons_tables.sql') + + // Always drop and recreate for idempotency + console.log(`Setting up persons migration database: ${dbName}`) + + // Drop existing database if it exists + await dropDbIfExists(adminUrl, dbName) + + // Create fresh database + await ensureDbExists(adminUrl, dbName) + + // Apply schema + await applySchema(dbUrl, sqlPath) + + // Check configuration + await checkPreparedTransactions(dbUrl) + + console.log(`Database ${dbName} setup completed successfully`) +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 025bc78adb..94d0cc6e83 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -198,11 +198,15 @@ export interface PluginsServerConfig extends CdpConfig, IngestionConsumerConfig GROUP_BATCH_WRITING_MAX_CONCURRENT_UPDATES: number // maximum number of concurrent updates to groups table per batch GROUP_BATCH_WRITING_MAX_OPTIMISTIC_UPDATE_RETRIES: number // maximum number of retries for optimistic update GROUP_BATCH_WRITING_OPTIMISTIC_UPDATE_RETRY_INTERVAL_MS: number // starting interval for exponential backoff between retries for optimistic update + PERSONS_DUAL_WRITE_ENABLED: boolean // Enable dual-write mode for persons to both primary and migration databases + PERSONS_DUAL_WRITE_COMPARISON_ENABLED: boolean // Enable comparison metrics between primary and secondary DBs during dual-write TASK_TIMEOUT: number // how many seconds until tasks are timed out DATABASE_URL: string // Postgres database URL DATABASE_READONLY_URL: string // Optional read-only replica to the main Postgres database PERSONS_DATABASE_URL: string // Optional read-write Postgres database for persons PERSONS_READONLY_DATABASE_URL: string // Optional read-only replica to the persons Postgres database + PERSONS_MIGRATION_DATABASE_URL: string // Read-write Postgres database for persons during dual write/migration + PERSONS_MIGRATION_READONLY_DATABASE_URL: string // Optional read-only replica to the persons Postgres database during dual write/migration PLUGIN_STORAGE_DATABASE_URL: string // Optional read-write Postgres database for plugin storage COUNTERS_DATABASE_URL: string // Optional read-write Postgres database for counters POSTGRES_CONNECTION_POOL_SIZE: number @@ -389,6 +393,7 @@ export interface Hub extends PluginsServerConfig { // active connections to Postgres, Redis, Kafka db: DB postgres: PostgresRouter + postgresPersonMigration: PostgresRouter redisPool: GenericPool cookielessRedisPool: GenericPool kafka: Kafka diff --git a/plugin-server/src/utils/db/db.ts b/plugin-server/src/utils/db/db.ts index 7cd537ee66..c07540cb14 100644 --- a/plugin-server/src/utils/db/db.ts +++ b/plugin-server/src/utils/db/db.ts @@ -126,6 +126,8 @@ export const POSTGRES_UNAVAILABLE_ERROR_MESSAGES = [ export class DB { /** Postgres connection router for database access. */ postgres: PostgresRouter + /** Postgres connection router for database access for persons migration. */ + postgresPersonMigration: PostgresRouter /** Redis used for various caches. */ redisPool: GenericPool /** Redis used to store state for cookieless ingestion. */ @@ -142,6 +144,7 @@ export class DB { constructor( postgres: PostgresRouter, + postgresPersonMigration: PostgresRouter, redisPool: GenericPool, redisPoolCookieless: GenericPool, kafkaProducer: KafkaProducerWrapper, @@ -149,6 +152,7 @@ export class DB { personAndGroupsCacheTtl = 1 ) { this.postgres = postgres + this.postgresPersonMigration = postgresPersonMigration this.redisPool = redisPool this.redisPoolCookieless = redisPoolCookieless this.kafkaProducer = kafkaProducer diff --git a/plugin-server/src/utils/db/hub.ts b/plugin-server/src/utils/db/hub.ts index fe4a3e3f92..53a72f3959 100644 --- a/plugin-server/src/utils/db/hub.ts +++ b/plugin-server/src/utils/db/hub.ts @@ -84,6 +84,14 @@ export async function createHub( logger.info('👍', `Kafka ready`) const postgres = new PostgresRouter(serverConfig) + + // Instantiate a second router for the Persons database migration + const postgresPersonMigration = new PostgresRouter({ + ...serverConfig, + PERSONS_DATABASE_URL: serverConfig.PERSONS_MIGRATION_DATABASE_URL || serverConfig.PERSONS_DATABASE_URL, + PERSONS_READONLY_DATABASE_URL: + serverConfig.PERSONS_MIGRATION_READONLY_DATABASE_URL || serverConfig.PERSONS_READONLY_DATABASE_URL, + }) // TODO: assert tables are reachable (async calls that cannot be in a constructor) logger.info('👍', `Postgres Router ready`) @@ -106,6 +114,7 @@ export async function createHub( const db = new DB( postgres, + postgresPersonMigration, redisPool, cookielessRedisPool, kafkaProducer, @@ -137,6 +146,7 @@ export async function createHub( capabilities, db, postgres, + postgresPersonMigration, redisPool, cookielessRedisPool, kafka, @@ -186,7 +196,12 @@ export const closeHub = async (hub: Hub): Promise => { } logger.info('💤', 'Closing kafka, redis, postgres...') await hub.pubSub.stop() - await Promise.allSettled([hub.kafkaProducer.disconnect(), hub.redisPool.drain(), hub.postgres?.end()]) + await Promise.allSettled([ + hub.kafkaProducer.disconnect(), + hub.redisPool.drain(), + hub.postgres?.end(), + hub.postgresPersonMigration?.end(), + ]) await hub.redisPool.clear() await hub.cookielessRedisPool.clear() logger.info('💤', 'Closing cookieless manager...') diff --git a/plugin-server/src/utils/db/postgres.ts b/plugin-server/src/utils/db/postgres.ts index d60f896607..7e638f618b 100644 --- a/plugin-server/src/utils/db/postgres.ts +++ b/plugin-server/src/utils/db/postgres.ts @@ -162,6 +162,10 @@ export class PostgresRouter { }) } + public async connect(usage: PostgresUse): Promise { + return await this.pools.get(usage)!.connect() + } + async end(): Promise { // Close all the connection pools const uniquePools: Set = new Set(this.pools.values()) diff --git a/plugin-server/src/utils/db/two-phase.test.ts b/plugin-server/src/utils/db/two-phase.test.ts new file mode 100644 index 0000000000..aa27c1bbbb --- /dev/null +++ b/plugin-server/src/utils/db/two-phase.test.ts @@ -0,0 +1,193 @@ +import { twoPhaseCommitFailuresCounter } from '~/worker/ingestion/persons/metrics' + +import { PostgresUse } from './postgres' +import { TwoPhaseCommitCoordinator } from './two-phase' + +type QueryCall = { sql: string; args?: any[] } + +class FakePoolClient { + public calls: QueryCall[] = [] + constructor(private opts: { failOnPrepare?: boolean; side: 'left' | 'right' }) {} + + query(sql: string, args?: any[]): any { + this.calls.push({ sql, args }) + if (sql.startsWith('PREPARE TRANSACTION') && this.opts.failOnPrepare) { + return Promise.reject(new Error(`prepare_failed_${this.opts.side}`)) + } + // BEGIN / ROLLBACK are always ok in this fake + return Promise.resolve({ rowCount: 0, rows: [] }) + } + + release(): void { + // no-op + } +} + +class FakeRouter { + public client: FakePoolClient + public routerCalls: QueryCall[] = [] + constructor( + private side: 'left' | 'right', + private opts: { failOnPrepare?: boolean; failCommitPrepared?: boolean; failRollbackPrepared?: boolean } = {} + ) { + this.client = new FakePoolClient({ failOnPrepare: opts.failOnPrepare, side }) + } + + connect(_use: PostgresUse): FakePoolClient { + return this.client + } + + query(_use: PostgresUse, sql: string, args?: any[], _tag?: string): any { + this.routerCalls.push({ sql, args }) + if (sql.startsWith('COMMIT PREPARED') && this.opts.failCommitPrepared) { + return Promise.reject(new Error(`commit_failed_${this.side}`)) + } + if (sql.startsWith('ROLLBACK PREPARED') && this.opts.failRollbackPrepared) { + return Promise.reject(new Error(`rollback_failed_${this.side}`)) + } + return Promise.resolve({ rowCount: 0, rows: [] }) + } +} + +// Helper to capture metric label+inc pairs +function spyOn2pcFailures() { + const labelsSpy = jest.spyOn(twoPhaseCommitFailuresCounter, 'labels') as any + const calls: Array<{ tag: string; phase: string }> = [] + labelsSpy.mockImplementation((tag: string, phase: string) => { + return { inc: jest.fn(() => calls.push({ tag, phase })) } + }) + return { labelsSpy, calls } +} + +describe('TwoPhaseCommitCoordinator', () => { + afterEach(() => { + jest.restoreAllMocks() + }) + + test('success path commits both sides', async () => { + const left = new FakeRouter('left') + const right = new FakeRouter('right') + const coord = new TwoPhaseCommitCoordinator({ + left: { router: left as any, use: PostgresUse.PERSONS_WRITE, name: 'L' }, + right: { router: right as any, use: PostgresUse.PERSONS_WRITE, name: 'R' }, + }) + + const { labelsSpy, calls } = spyOn2pcFailures() + + const result = await coord.run('ok', () => Promise.resolve('done')) + + expect(result).toBe('done') + // Both sides prepared via client + expect(left.client.calls.find((c) => c.sql.startsWith('PREPARE TRANSACTION'))).toBeTruthy() + expect(right.client.calls.find((c) => c.sql.startsWith('PREPARE TRANSACTION'))).toBeTruthy() + // Both sides committed via router + expect(left.routerCalls.find((c) => c.sql.startsWith('COMMIT PREPARED'))).toBeTruthy() + expect(right.routerCalls.find((c) => c.sql.startsWith('COMMIT PREPARED'))).toBeTruthy() + // No failure metrics + expect(labelsSpy).not.toHaveBeenCalled() + expect(calls.length).toBe(0) + }) + + test('prepare left fails increments prepare_left_failed and run_failed', async () => { + const left = new FakeRouter('left', { failOnPrepare: true }) + const right = new FakeRouter('right') + const coord = new TwoPhaseCommitCoordinator({ + left: { router: left as any, use: PostgresUse.PERSONS_WRITE }, + right: { router: right as any, use: PostgresUse.PERSONS_WRITE }, + }) + + const { calls } = spyOn2pcFailures() + + await expect(coord.run('t1', () => Promise.resolve('x'))).rejects.toThrow(/prepare_failed_left/) + + const phases = calls.map((c) => c.phase) + expect(phases).toContain('prepare_left_failed') + expect(phases).toContain('run_failed') + // Right side's prepare succeeded, so it should be rolled back via router + expect(right.routerCalls.find((c) => c.sql.startsWith('ROLLBACK PREPARED'))).toBeTruthy() + }) + + test('prepare right fails increments prepare_right_failed and run_failed and rolls back left prepared', async () => { + const left = new FakeRouter('left') + const right = new FakeRouter('right', { failOnPrepare: true }) + const coord = new TwoPhaseCommitCoordinator({ + left: { router: left as any, use: PostgresUse.PERSONS_WRITE }, + right: { router: right as any, use: PostgresUse.PERSONS_WRITE }, + }) + + const { calls } = spyOn2pcFailures() + + await expect(coord.run('t2', () => Promise.resolve('x'))).rejects.toThrow(/prepare_failed_right/) + + const phases = calls.map((c) => c.phase) + expect(phases).toContain('prepare_right_failed') + expect(phases).toContain('run_failed') + // Left was prepared and should have been rolled back via router + expect(left.routerCalls.find((c) => c.sql.startsWith('ROLLBACK PREPARED'))).toBeTruthy() + }) + + test('commit left fails increments commit_left_failed and run_failed', async () => { + const left = new FakeRouter('left', { failCommitPrepared: true }) + const right = new FakeRouter('right') + const coord = new TwoPhaseCommitCoordinator({ + left: { router: left as any, use: PostgresUse.PERSONS_WRITE }, + right: { router: right as any, use: PostgresUse.PERSONS_WRITE }, + }) + + const { calls } = spyOn2pcFailures() + + await expect(coord.run('t3', () => Promise.resolve('x'))).rejects.toThrow(/commit_failed_left/) + + const phases = calls.map((c) => c.phase) + expect(phases).toContain('commit_left_failed') + expect(phases).toContain('run_failed') + // After failure, we attempt rollbacks + expect(left.routerCalls.find((c) => c.sql.startsWith('ROLLBACK PREPARED'))).toBeTruthy() + expect(right.routerCalls.find((c) => c.sql.startsWith('ROLLBACK PREPARED'))).toBeTruthy() + }) + + test('commit right fails increments commit_right_failed and run_failed', async () => { + const left = new FakeRouter('left') + const right = new FakeRouter('right', { failCommitPrepared: true }) + const coord = new TwoPhaseCommitCoordinator({ + left: { router: left as any, use: PostgresUse.PERSONS_WRITE }, + right: { router: right as any, use: PostgresUse.PERSONS_WRITE }, + }) + + const { calls } = spyOn2pcFailures() + + await expect(coord.run('t4', () => Promise.resolve('x'))).rejects.toThrow(/commit_failed_right/) + + const phases = calls.map((c) => c.phase) + expect(phases).toContain('commit_right_failed') + expect(phases).toContain('run_failed') + // Left side was already committed when right failed, so it should NOT attempt rollback + // (you cannot rollback an already-committed transaction) + expect(left.routerCalls.find((c) => c.sql.startsWith('ROLLBACK PREPARED'))).toBeFalsy() + // Right side's prepared transaction should be rolled back + expect(right.routerCalls.find((c) => c.sql.startsWith('ROLLBACK PREPARED'))).toBeTruthy() + }) + + test('fn throws increments run_failed and rolls back both local txs', async () => { + const left = new FakeRouter('left') + const right = new FakeRouter('right') + const coord = new TwoPhaseCommitCoordinator({ + left: { router: left as any, use: PostgresUse.PERSONS_WRITE }, + right: { router: right as any, use: PostgresUse.PERSONS_WRITE }, + }) + + const { calls } = spyOn2pcFailures() + + await expect( + coord.run('t5', () => { + throw new Error('boom') + }) + ).rejects.toThrow('boom') + + const phases = calls.map((c) => c.phase) + expect(phases).toContain('run_failed') + // Both sides should have rolled back local txs (not prepared) + expect(left.client.calls.find((c) => c.sql === 'ROLLBACK')).toBeTruthy() + expect(right.client.calls.find((c) => c.sql === 'ROLLBACK')).toBeTruthy() + }) +}) diff --git a/plugin-server/src/utils/db/two-phase.ts b/plugin-server/src/utils/db/two-phase.ts new file mode 100644 index 0000000000..6d821b5532 --- /dev/null +++ b/plugin-server/src/utils/db/two-phase.ts @@ -0,0 +1,171 @@ +import { PoolClient } from 'pg' + +import { twoPhaseCommitFailuresCounter } from '~/worker/ingestion/persons/metrics' + +import { logger } from '../logger' +import { instrumentQuery } from '../metrics' +import { PostgresRouter, PostgresUse, TransactionClient } from './postgres' + +export type TwoPhaseSides = { + left: { router: PostgresRouter; use: PostgresUse; name?: string } + right: { router: PostgresRouter; use: PostgresUse; name?: string } +} + +export class TwoPhaseCommitCoordinator { + constructor(private sides: TwoPhaseSides) {} + + private makeGid(tag: string): string { + const ts = Date.now() + const rand = Math.random().toString(36).slice(2, 10) + + // GID must <= 200 chars + return `dualwrite:${tag}:${ts}:${rand}` + } + + async run(tag: string, fn: (leftTx: TransactionClient, rightTx: TransactionClient) => Promise): Promise { + // GID is unique across the DBs but has a shared root that can be used to identify the tx + // across the two databases + // we don't re-use the exact same id so that we can support running 2PCs across two databases on the same cluster/machine + // this is helpful in test harness, where we don't want to spin up another PG instance but just stick another DB on the same instance + // the transaction id would clash in this cases if we used the exact same id + const gidRoot = this.makeGid(tag) + const gidLeft = `${gidRoot}:left` + const gidRight = `${gidRoot}:right` + const gidLeftLiteral = `'${gidLeft.replace(/'/g, "''")}'` + const gidRightLiteral = `'${gidRight.replace(/'/g, "''")}'` + const { left, right } = this.sides + + return await instrumentQuery('query.dualwrite_spc', tag, async () => { + let lClient: PoolClient | undefined + let rClient: PoolClient | undefined + let preparedLeft = false + let preparedRight = false + + try { + lClient = await left.router.connect(left.use) + rClient = await right.router.connect(right.use) + + await Promise.all([lClient?.query('BEGIN'), rClient?.query('BEGIN')]) + + const result = await fn( + new TransactionClient(left.use, lClient), + new TransactionClient(right.use, rClient) + ) + + const prepareResults = await Promise.allSettled([ + lClient?.query(`PREPARE TRANSACTION ${gidLeftLiteral}`), + rClient?.query(`PREPARE TRANSACTION ${gidRightLiteral}`), + ]) + + if (prepareResults[0].status === 'rejected') { + twoPhaseCommitFailuresCounter.labels(tag, 'prepare_left_failed').inc() + if (prepareResults[1].status === 'fulfilled') { + preparedRight = true + } + throw prepareResults[0].reason + } + preparedLeft = true + + if (prepareResults[1].status === 'rejected') { + twoPhaseCommitFailuresCounter.labels(tag, 'prepare_right_failed').inc() + throw prepareResults[1].reason + } + preparedRight = true + + // Release the transaction clients back to the connection pool. + // After PREPARE TRANSACTION, the transaction is no longer associated with these connections. + // The prepared transactions now exist as independent entities in PostgreSQL's shared state + // and can be committed or rolled back from ANY connection, not just the original ones. + // This is a key feature of 2PC that enables recovery - if this process crashes after PREPARE, + // another process can still complete the commit using just the transaction IDs. + // Releasing the connections here also improves connection pool efficiency. + lClient?.release() + rClient?.release() + lClient = undefined + rClient = undefined + + // COMMIT PREPARED can be executed from any connection, so we use the router to get + // fresh connections. This demonstrates the durability guarantee of 2PC - the prepared + // transactions persist independently of any specific database connection. + try { + await left.router.query(left.use, `COMMIT PREPARED ${gidLeftLiteral}`, [], `2pc-commit-left:${tag}`) + // Once committed, the prepared transaction no longer exists and cannot be rolled back + preparedLeft = false + } catch (e) { + twoPhaseCommitFailuresCounter.labels(tag, 'commit_left_failed').inc() + throw e + } + try { + await right.router.query( + right.use, + `COMMIT PREPARED ${gidRightLiteral}`, + [], + `2pc-commit-right:${tag}` + ) + // Once committed, the prepared transaction no longer exists and cannot be rolled back + preparedRight = false + } catch (e) { + twoPhaseCommitFailuresCounter.labels(tag, 'commit_right_failed').inc() + throw e + } + + return result + } catch (error) { + try { + if (preparedLeft) { + try { + await left.router.query( + left.use, + `ROLLBACK PREPARED ${gidLeftLiteral}`, + [], + `2pc-rollback-left:${tag}` + ) + } catch (e) { + twoPhaseCommitFailuresCounter.labels(tag, 'rollback_left_failed').inc() + throw e + } + } else if (lClient) { + await lClient.query('ROLLBACK') + } + } catch (e) { + logger.error('Failed to rollback/cleanup left side of 2 PC') + } + try { + if (preparedRight) { + try { + await right.router.query( + right.use, + `ROLLBACK PREPARED ${gidRightLiteral}`, + [], + `2pc-rollback-right:${tag}` + ) + } catch (e) { + twoPhaseCommitFailuresCounter.labels(tag, 'rollback_right_failed').inc() + throw e + } + } else if (rClient) { + await rClient.query('ROLLBACK') + } + } catch (e) { + logger.error('Failed to rollback/cleanup right side of 2 PC') + } + logger.error('2 phase commit failed', { + tag, + gid: gidRoot, + left: this.sides.left.name ?? 'left', + right: this.sides.right.name ?? 'right', + error, + }) + twoPhaseCommitFailuresCounter.labels(tag, 'run_failed').inc() + throw error + } finally { + try { + lClient?.release() + } catch {} + try { + rClient?.release() + } catch {} + } + }) + } +} diff --git a/plugin-server/src/worker/ingestion/persons/metrics.ts b/plugin-server/src/worker/ingestion/persons/metrics.ts index eec37f2082..4342a22d7c 100644 --- a/plugin-server/src/worker/ingestion/persons/metrics.ts +++ b/plugin-server/src/worker/ingestion/persons/metrics.ts @@ -127,6 +127,24 @@ export const personShadowModeReturnIntermediateOutcomeCounter = new Counter({ labelNames: ['method', 'outcome'], }) +export const twoPhaseCommitFailuresCounter = new Counter({ + name: 'person_dualwrite_2pc_failures_total', + help: 'Two-phase commit failures for dual-write person repository', + labelNames: ['tag', 'phase'], // phase: fn_failed, prepare_left_failed, prepare_right_failed, commit_left_failed, commit_right_failed, rollback_left_failed, rollback_right_failed, run_failed +}) + +export const dualWriteComparisonCounter = new Counter({ + name: 'person_dualwrite_comparison_total', + help: 'Comparison results between primary and secondary databases in dual-write mode', + labelNames: ['operation', 'comparison_type', 'result'], // operation: createPerson, updatePerson, etc., comparison_type: success_match, data_mismatch, error_mismatch, result: match, mismatch +}) + +export const dualWriteDataMismatchCounter = new Counter({ + name: 'person_dualwrite_data_mismatch_total', + help: 'Detailed data mismatches between primary and secondary databases', + labelNames: ['operation', 'field'], // field: properties, version, is_identified, etc. +}) + export function getVersionBucketLabel(version: number): string { if (version === 0) { return 'v0' diff --git a/plugin-server/src/worker/ingestion/persons/repositories/dualwrite-person-repository-transaction.ts b/plugin-server/src/worker/ingestion/persons/repositories/dualwrite-person-repository-transaction.ts new file mode 100644 index 0000000000..a2bb839f41 --- /dev/null +++ b/plugin-server/src/worker/ingestion/persons/repositories/dualwrite-person-repository-transaction.ts @@ -0,0 +1,323 @@ +import { DateTime } from 'luxon' + +import { Properties } from '@posthog/plugin-scaffold' + +import { TopicMessage } from '~/kafka/producer' +import { InternalPerson, PropertiesLastOperation, PropertiesLastUpdatedAt, Team } from '~/types' +import { CreatePersonResult, MoveDistinctIdsResult } from '~/utils/db/db' +import { TransactionClient } from '~/utils/db/postgres' + +import { dualWriteComparisonCounter, dualWriteDataMismatchCounter } from '../metrics' +import { PersonRepositoryTransaction } from './person-repository-transaction' +import { RawPostgresPersonRepository } from './raw-postgres-person-repository' + +export class DualWritePersonRepositoryTransaction implements PersonRepositoryTransaction { + constructor( + private primaryRepo: RawPostgresPersonRepository, + private secondaryRepo: RawPostgresPersonRepository, + private lTx: TransactionClient, + private rTx: TransactionClient, + private comparisonEnabled: boolean = false + ) {} + + async createPerson( + createdAt: DateTime, + properties: Properties, + propertiesLastUpdatedAt: PropertiesLastUpdatedAt, + propertiesLastOperation: PropertiesLastOperation, + teamId: Team['id'], + isUserId: number | null, + isIdentified: boolean, + uuid: string, + distinctIds?: { distinctId: string; version?: number }[] + ): Promise { + const p = await this.primaryRepo.createPerson( + createdAt, + properties, + propertiesLastUpdatedAt, + propertiesLastOperation, + teamId, + isUserId, + isIdentified, + uuid, + distinctIds, + this.lTx + ) + if (!p.success) { + // We need to throw to trigger rollback, but preserve the error type + // so the outer repository can handle it appropriately + const error = new Error(`DualWrite primary create failed`) + ;(error as any).result = p + throw error + } + // force same ID on secondary + const forcedId = Number(p.person.id) + const s = await this.secondaryRepo.createPerson( + createdAt, + properties, + propertiesLastUpdatedAt, + propertiesLastOperation, + teamId, + isUserId, + isIdentified, + uuid, + distinctIds, + this.rTx, + forcedId + ) + if (!s.success) { + const error = new Error(`DualWrite secondary create failed`) + ;(error as any).result = s + throw error + } + + // Compare results between primary and secondary + if (this.comparisonEnabled) { + this.compareCreatePersonResults(p, s) + } + + return p + } + + async updatePerson( + person: InternalPerson, + update: Partial, + tag?: string + ): Promise<[InternalPerson, TopicMessage[], boolean]> { + // Enforce version parity across primary/secondary: run primary first, then set secondary to primary's new version + const primaryOut = await this.primaryRepo.updatePerson(person, { ...update }, tag, this.lTx) + const primaryUpdated = primaryOut[0] + const secondaryOut = await this.secondaryRepo.updatePerson( + person, + { ...update, version: primaryUpdated.version }, + tag ? `${tag}-secondary` : undefined, + this.rTx + ) + + if (this.comparisonEnabled) { + this.compareUpdatePersonResults(primaryOut, secondaryOut, tag) + } + + return primaryOut + } + + async deletePerson(person: InternalPerson): Promise { + const [p, s] = await Promise.all([ + this.primaryRepo.deletePerson(person, this.lTx), + this.secondaryRepo.deletePerson(person, this.rTx), + ]) + + if (this.comparisonEnabled) { + this.compareTopicMessages('deletePerson', p, s) + } + + return p + } + + async addDistinctId(person: InternalPerson, distinctId: string, version: number): Promise { + const [p, s] = await Promise.all([ + this.primaryRepo.addDistinctId(person, distinctId, version, this.lTx), + this.secondaryRepo.addDistinctId(person, distinctId, version, this.rTx), + ]) + + if (this.comparisonEnabled) { + this.compareTopicMessages('addDistinctId', p, s) + } + + return p + } + + async moveDistinctIds( + source: InternalPerson, + target: InternalPerson, + limit?: number + ): Promise { + const [p, s] = await Promise.all([ + this.primaryRepo.moveDistinctIds(source, target, limit, this.lTx), + this.secondaryRepo.moveDistinctIds(source, target, limit, this.rTx), + ]) + // Match the behavior of the direct repository call: + // If both repositories return the same failure result, that's expected behavior + if (!p.success && !s.success && p.error === s.error) { + return p + } + if (p.success !== s.success || (!p.success && !s.success && p.error !== s.error)) { + if (this.comparisonEnabled) { + dualWriteComparisonCounter.inc({ + operation: 'moveDistinctIds', + comparison_type: p.success !== s.success ? 'success_mismatch' : 'error_mismatch', + result: 'mismatch', + }) + } + // In the direct repository, this causes a rollback via returning false from coordinator + // In transaction context, we should throw to trigger rollback + const pError = !p.success ? p.error : 'none' + const sError = !s.success ? s.error : 'none' + throw new Error( + `DualWrite moveDistinctIds mismatch: primary=${p.success}/${pError}, secondary=${s.success}/${sError}` + ) + } + + if (this.comparisonEnabled && p.success && s.success) { + this.compareTopicMessages('moveDistinctIds', p.messages || [], s.messages || []) + } + + return p + } + + async fetchPersonDistinctIds(person: InternalPerson, limit?: number): Promise { + // This is a read operation, only use primary + return await this.primaryRepo.fetchPersonDistinctIds(person, limit, this.lTx) + } + + async addPersonlessDistinctIdForMerge(teamId: Team['id'], distinctId: string): Promise { + const [p, s] = await Promise.all([ + this.primaryRepo.addPersonlessDistinctIdForMerge(teamId, distinctId, this.lTx), + this.secondaryRepo.addPersonlessDistinctIdForMerge(teamId, distinctId, this.rTx), + ]) + + if (this.comparisonEnabled) { + if (p !== s) { + dualWriteComparisonCounter.inc({ + operation: 'addPersonlessDistinctIdForMerge', + comparison_type: 'boolean_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: 'addPersonlessDistinctIdForMerge', + comparison_type: 'boolean_match', + result: 'match', + }) + } + } + + return p + } + + async updateCohortsAndFeatureFlagsForMerge( + teamID: Team['id'], + sourcePersonID: InternalPerson['id'], + targetPersonID: InternalPerson['id'] + ): Promise { + await Promise.all([ + this.primaryRepo.updateCohortsAndFeatureFlagsForMerge(teamID, sourcePersonID, targetPersonID, this.lTx), + this.secondaryRepo.updateCohortsAndFeatureFlagsForMerge(teamID, sourcePersonID, targetPersonID, this.rTx), + ]) + } + + private compareCreatePersonResults(primary: CreatePersonResult, secondary: CreatePersonResult): void { + if (primary.success !== secondary.success) { + dualWriteComparisonCounter.inc({ + operation: 'createPerson_tx', + comparison_type: 'success_mismatch', + result: 'mismatch', + }) + return + } + + if (!primary.success || !secondary.success) { + if (!primary.success && !secondary.success && primary.error !== secondary.error) { + dualWriteComparisonCounter.inc({ + operation: 'createPerson_tx', + comparison_type: 'error_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: 'createPerson_tx', + comparison_type: 'error_match', + result: 'match', + }) + } + return + } + + const p = primary.person + const s = secondary.person + let hasMismatch = false + + if (JSON.stringify(p.properties) !== JSON.stringify(s.properties)) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson_tx', field: 'properties' }) + hasMismatch = true + } + if (p.version !== s.version) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson_tx', field: 'version' }) + hasMismatch = true + } + if (p.is_identified !== s.is_identified) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson_tx', field: 'is_identified' }) + hasMismatch = true + } + if (p.is_user_id !== s.is_user_id) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson_tx', field: 'is_user_id' }) + hasMismatch = true + } + + dualWriteComparisonCounter.inc({ + operation: 'createPerson_tx', + comparison_type: 'data_comparison', + result: hasMismatch ? 'mismatch' : 'match', + }) + } + + private compareUpdatePersonResults( + primary: [InternalPerson, TopicMessage[], boolean], + secondary: [InternalPerson, TopicMessage[], boolean], + tag?: string + ): void { + const [pPerson, pMessages, pChanged] = primary + const [sPerson, sMessages, sChanged] = secondary + let hasMismatch = false + + if (JSON.stringify(pPerson.properties) !== JSON.stringify(sPerson.properties)) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson_tx:${tag ?? 'update'}`, field: 'properties' }) + hasMismatch = true + } + if (pPerson.version !== sPerson.version) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson_tx:${tag ?? 'update'}`, field: 'version' }) + hasMismatch = true + } + if (pPerson.is_identified !== sPerson.is_identified) { + dualWriteDataMismatchCounter.inc({ + operation: `updatePerson_tx:${tag ?? 'update'}`, + field: 'is_identified', + }) + hasMismatch = true + } + if (pChanged !== sChanged) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson_tx:${tag ?? 'update'}`, field: 'changed_flag' }) + hasMismatch = true + } + + if (pMessages.length !== sMessages.length) { + dualWriteDataMismatchCounter.inc({ + operation: `updatePerson_tx:${tag ?? 'update'}`, + field: 'message_count', + }) + hasMismatch = true + } + + dualWriteComparisonCounter.inc({ + operation: `updatePerson_tx:${tag ?? 'update'}`, + comparison_type: 'data_comparison', + result: hasMismatch ? 'mismatch' : 'match', + }) + } + + private compareTopicMessages(operation: string, primary: TopicMessage[], secondary: TopicMessage[]): void { + if (primary.length !== secondary.length) { + dualWriteComparisonCounter.inc({ + operation: `${operation}_tx`, + comparison_type: 'message_count_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: `${operation}_tx`, + comparison_type: 'message_count_match', + result: 'match', + }) + } + } +} diff --git a/plugin-server/src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository-2pc.test.ts b/plugin-server/src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository-2pc.test.ts new file mode 100644 index 0000000000..2eb6704eaf --- /dev/null +++ b/plugin-server/src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository-2pc.test.ts @@ -0,0 +1,2167 @@ +import { DateTime } from 'luxon' + +import { resetTestDatabase } from '~/tests/helpers/sql' +import { Hub } from '~/types' +import { closeHub, createHub } from '~/utils/db/hub' +import { PostgresRouter, PostgresUse } from '~/utils/db/postgres' + +import { PostgresDualWritePersonRepository } from './postgres-dualwrite-person-repository' +import { + assertConsistencyAcrossDatabases, + cleanupPrepared, + getFirstTeam, + mockDatabaseError, + setupMigrationDb, +} from './test-helpers' + +jest.mock('../../../../utils/logger') + +describe('PostgresDualWritePersonRepository 2PC Dual-Write Tests', () => { + let hub: Hub + let postgres: PostgresRouter + let migrationPostgres: PostgresRouter + let repository: PostgresDualWritePersonRepository + + async function verifyDistinctIdsForPerson( + teamId: number, + personId: string, + personUuid: string, + expectedDids: string[] + ) { + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + `SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id`, + [personId], + 'verify-primary-dids', + 'verify-secondary-dids-placeholder' + ) + + // For secondary DB, we need to use UUID to find person ID + const secondaryDids = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2) ORDER BY distinct_id', + [teamId, personUuid], + 'verify-secondary-dids' + ) + + const primaryDids = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [personId], + 'verify-primary-dids-final' + ) + + expect(primaryDids.rows.map((r: any) => r.distinct_id)).toEqual(expectedDids) + expect(secondaryDids.rows.map((r: any) => r.distinct_id)).toEqual(expectedDids) + } + + beforeEach(async () => { + hub = await createHub() + await resetTestDatabase(undefined, {}, {}, { withExtendedTestData: false }) + postgres = hub.db.postgres + migrationPostgres = hub.db.postgresPersonMigration + await setupMigrationDb(migrationPostgres) + + repository = new PostgresDualWritePersonRepository(postgres, migrationPostgres) + + const redis = await hub.redisPool.acquire() + await redis.flushdb() + await hub.redisPool.release(redis) + }) + + afterEach(async () => { + await cleanupPrepared(hub) + await closeHub(hub) + jest.clearAllMocks() + }) + + describe('createPerson() 2PC tests', () => { + it('writes to both primary and secondary (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '11111111-1111-1111-1111-111111111111' + + const result = await repository.createPerson( + createdAt, + { name: 'Dual Write' }, + {}, + {}, + team.id, + null, + true, + uuid, + [{ distinctId: 'dw-1', version: 0 }] + ) + expect(result.success).toBe(true) + + const primary = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE team_id = $1 AND uuid = $2', + [team.id, uuid], + 'verify-primary-create' + ) + const secondary = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE team_id = $1 AND uuid = $2', + [team.id, uuid], + 'verify-secondary-create' + ) + expect(primary.rows.length).toBe(1) + expect(secondary.rows.length).toBe(1) + + const pids = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_persondistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dw-1'], + 'verify-primary-distinct' + ) + const sids = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_persondistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dw-1'], + 'verify-secondary-distinct' + ) + expect(pids.rows.length).toBe(1) + expect(sids.rows.length).toBe(1) + }) + + it('rolls back both when secondary write fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '88888888-8888-8888-8888-888888888888' + const spy = jest + .spyOn((repository as any).secondaryRepo, 'createPerson') + .mockRejectedValue(new Error('simulated secondary failure')) + + await expect( + repository.createPerson(createdAt, { y: 1 }, {}, {}, team.id, null, false, uuid, [ + { distinctId: 'dw-fail' }, + ]) + ).rejects.toThrow('simulated secondary failure') + + spy.mockRestore() + + const primaryPersons = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT id FROM posthog_person WHERE team_id = $1 AND uuid = $2', + [team.id, uuid], + 'verify-primary-no-create-after-rollback' + ) + const secondaryPersons = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT id FROM posthog_person WHERE team_id = $1 AND uuid = $2', + [team.id, uuid], + 'verify-secondary-no-create-after-rollback' + ) + expect(primaryPersons.rows.length).toBe(0) + expect(secondaryPersons.rows.length).toBe(0) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '11111111-1111-1111-1111-111111111111' + + const mockSpy = mockDatabaseError(postgres, new Error('primary database connection lost'), 'insertPerson') + + await expect( + repository.createPerson(createdAt, { name: 'Test Person' }, {}, {}, team.id, null, false, uuid, [ + { distinctId: 'test-primary-fail', version: 0 }, + ]) + ).rejects.toThrow('primary database connection lost') + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-person-rollback', + 'verify-secondary-person-rollback' + ) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['test-primary-fail'], + 'verify-primary-did-rollback', + 'verify-secondary-did-rollback' + ) + + const personCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-empty-person' + ) + const didCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['test-primary-fail'], + 'verify-empty-did' + ) + expect(personCheck.rows.length).toBe(0) + expect(didCheck.rows.length).toBe(0) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '22222222-2222-2222-2222-222222222222' + + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary database connection lost'), + 'insertPerson' + ) + + await expect( + repository.createPerson(createdAt, { name: 'Test Person' }, {}, {}, team.id, null, false, uuid, [ + { distinctId: 'test-secondary-fail', version: 0 }, + ]) + ).rejects.toThrow('secondary database connection lost') + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-person-rollback', + 'verify-secondary-person-rollback' + ) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['test-secondary-fail'], + 'verify-primary-did-rollback', + 'verify-secondary-did-rollback' + ) + + const personCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-empty-person' + ) + const didCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['test-secondary-fail'], + 'verify-empty-did' + ) + expect(personCheck.rows.length).toBe(0) + expect(didCheck.rows.length).toBe(0) + }) + }) + + describe('updatePerson() 2PC tests', () => { + it('replicates to secondary (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '22222222-2222-2222-2222-222222222222' + const { person } = (await repository.createPerson( + createdAt, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'dw-2' }] + )) as any + + const [updated] = await repository.updatePerson(person, { properties: { name: 'B' } }) + + const primary = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE id = $1', + [updated.id], + 'verify-primary-update' + ) + const secondary = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-update' + ) + expect(primary.rows[0].properties).toEqual({ name: 'B' }) + expect(secondary.rows[0].properties).toEqual({ name: 'B' }) + }) + + it('rolls back both when secondary update fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '88888888-8888-8888-8888-888888888888' + const { person } = (await repository.createPerson(createdAt, { y: 1 }, {}, {}, team.id, null, false, uuid, [ + { distinctId: 'dw-fail' }, + ])) as any + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'updatePerson') + .mockRejectedValue(new Error('simulated secondary failure')) + + await expect(repository.updatePerson(person, { properties: { y: 2 } }, 'test-fail')).rejects.toThrow( + 'simulated secondary failure' + ) + + spy.mockRestore() + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-rolled-back' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-rolled-back' + ) + expect(p.rows[0].properties).toEqual({ y: 1 }) + expect(s.rows[0].properties).toEqual({ y: 1 }) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '33333333-3333-3333-3333-333333333333' + + const { person } = (await repository.createPerson( + createdAt, + { name: 'Original' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'update-primary-fail', version: 0 }] + )) as any + + const mockSpy = mockDatabaseError(postgres, new Error('primary update failed'), 'updatePerson') + + await expect(repository.updatePerson(person, { properties: { name: 'Updated' } })).rejects.toThrow( + 'primary update failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-unchanged', + 'verify-secondary-unchanged' + ) + + const personCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-properties' + ) + expect(personCheck.rows[0].properties).toEqual({ name: 'Original' }) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '44444444-4444-4444-4444-444444444444' + + const { person } = (await repository.createPerson( + createdAt, + { name: 'Original' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'update-secondary-fail', version: 0 }] + )) as any + + const mockSpy = mockDatabaseError(migrationPostgres, new Error('secondary update failed'), 'updatePerson') + + await expect(repository.updatePerson(person, { properties: { name: 'Updated' } })).rejects.toThrow( + 'secondary update failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-unchanged', + 'verify-secondary-unchanged' + ) + + const personCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-properties' + ) + expect(personCheck.rows[0].properties).toEqual({ name: 'Original' }) + }) + }) + + describe('deletePerson() 2PC tests', () => { + it('removes from both (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '33333333-3333-3333-3333-333333333333' + const { person } = (await repository.createPerson(createdAt, {}, {}, {}, team.id, null, false, uuid)) as any + + await repository.deletePerson(person) + + const primary = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-delete' + ) + const secondary = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-delete' + ) + expect(primary.rows.length).toBe(0) + expect(secondary.rows.length).toBe(0) + }) + + it('rolls back both when secondary delete fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-16T10:30:00.000Z').toUTC() + const uuid = '33333333-3333-3333-3333-333333333334' + const { person } = (await repository.createPerson(createdAt, {}, {}, {}, team.id, null, false, uuid)) as any + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'deletePerson') + .mockRejectedValue(new Error('simulated secondary delete failure')) + + await expect(repository.deletePerson(person)).rejects.toThrow('simulated secondary delete failure') + + spy.mockRestore() + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-delete-rolled-back' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-delete-rolled-back' + ) + expect(p.rows.length).toBe(1) + expect(s.rows.length).toBe(1) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '55555555-5555-5555-5555-555555555555' + + const { person } = (await repository.createPerson( + createdAt, + { name: 'To Delete' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'delete-primary-fail', version: 0 }] + )) as any + + const mockSpy = mockDatabaseError(postgres, new Error('primary delete failed'), 'deletePerson') + + await expect(repository.deletePerson(person)).rejects.toThrow('primary delete failed') + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-exists', + 'verify-secondary-exists' + ) + + const personCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-person-exists' + ) + expect(personCheck.rows.length).toBe(1) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '66666666-6666-6666-6666-666666666666' + + const { person } = (await repository.createPerson( + createdAt, + { name: 'To Delete' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'delete-secondary-fail', version: 0 }] + )) as any + + const mockSpy = mockDatabaseError(migrationPostgres, new Error('secondary delete failed'), 'deletePerson') + + await expect(repository.deletePerson(person)).rejects.toThrow('secondary delete failed') + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-exists', + 'verify-secondary-exists' + ) + + const personCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-person-exists' + ) + expect(personCheck.rows.length).toBe(1) + }) + }) + + describe('addDistinctId() 2PC tests', () => { + it('writes to both (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '44444444-4444-4444-4444-444444444444' + const { person } = (await repository.createPerson(createdAt, {}, {}, {}, team.id, null, true, uuid, [ + { distinctId: 'dw-3' }, + ])) as any + + await repository.addDistinctId(person, 'dw-3b', 1) + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dw-3b'], + 'verify-primary-add-distinct' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dw-3b'], + 'verify-secondary-add-distinct' + ) + expect(p.rows.length).toBe(1) + expect(s.rows.length).toBe(1) + }) + + it('rolls back both when secondary addDistinctId fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-17T10:30:00.000Z').toUTC() + const uuid = '44444444-4444-4444-4444-444444444445' + const { person } = (await repository.createPerson(createdAt, {}, {}, {}, team.id, null, true, uuid, [ + { distinctId: 'dw-3c', version: 0 }, + ])) as any + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'addDistinctId') + .mockRejectedValue(new Error('simulated secondary addDistinctId failure')) + + await expect(repository.addDistinctId(person, 'dw-3d', 1)).rejects.toThrow( + 'simulated secondary addDistinctId failure' + ) + + spy.mockRestore() + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dw-3d'], + 'verify-primary-add-distinct-rolled-back' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dw-3d'], + 'verify-secondary-add-distinct-rolled-back' + ) + expect(p.rows.length).toBe(0) + expect(s.rows.length).toBe(0) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '77777777-7777-7777-7777-777777777777' + + const { person } = (await repository.createPerson( + createdAt, + { name: 'Test' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'original-did', version: 0 }] + )) as any + + const mockSpy = mockDatabaseError(postgres, new Error('primary addDistinctId failed'), 'addDistinctId') + + await expect(repository.addDistinctId(person, 'new-did-primary-fail', 1)).rejects.toThrow( + 'primary addDistinctId failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['new-did-primary-fail'], + 'verify-primary-no-new-did', + 'verify-secondary-no-new-did' + ) + + const didCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['new-did-primary-fail'], + 'verify-no-did' + ) + expect(didCheck.rows.length).toBe(0) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '88888888-8888-8888-8888-888888888888' + + const { person } = (await repository.createPerson( + createdAt, + { name: 'Test' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'original-did-2', version: 0 }] + )) as any + + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary addDistinctId failed'), + 'addDistinctId' + ) + + await expect(repository.addDistinctId(person, 'new-did-secondary-fail', 1)).rejects.toThrow( + 'secondary addDistinctId failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['new-did-secondary-fail'], + 'verify-primary-no-new-did', + 'verify-secondary-no-new-did' + ) + + const didCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id = $1', + ['new-did-secondary-fail'], + 'verify-no-did' + ) + expect(didCheck.rows.length).toBe(0) + }) + }) + + describe('moveDistinctIds() 2PC tests', () => { + it('affects both sides (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const srcUuid = '55555555-5555-5555-5555-555555555555' + const tgtUuid = '66666666-6666-6666-6666-666666666666' + + const { person: src } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + srcUuid, + [{ distinctId: 'src-a', version: 0 }] + )) as any + const { person: tgt } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + tgtUuid, + [{ distinctId: 'tgt-a', version: 0 }] + )) as any + + await repository.addDistinctId(src, 'src-b', 1) + + const res = await repository.moveDistinctIds(src, tgt) + expect(res.success).toBe(true) + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [tgt.id], + 'verify-primary-move' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2)', + [team.id, tgtUuid], + 'verify-secondary-move' + ) + const pIds = p.rows.map((r: any) => r.distinct_id).sort() + const sIds = s.rows.map((r: any) => r.distinct_id).sort() + expect(pIds).toEqual(expect.arrayContaining(['src-a', 'src-b', 'tgt-a'])) + expect(sIds).toEqual(expect.arrayContaining(['src-a', 'src-b', 'tgt-a'])) + }) + + it('rolls back both when secondary moveDistinctIds fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-18T10:30:00.000Z').toUTC() + const srcUuid = '55555555-5555-5555-5555-555555555556' + const tgtUuid = '66666666-6666-6666-6666-666666666667' + + const { person: src } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + srcUuid, + [{ distinctId: 'src-c', version: 0 }] + )) as any + const { person: tgt } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + tgtUuid, + [{ distinctId: 'tgt-c', version: 0 }] + )) as any + + await repository.addDistinctId(src, 'src-d', 1) + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'moveDistinctIds') + .mockRejectedValue(new Error('simulated secondary moveDistinctIds failure')) + + await expect(repository.moveDistinctIds(src, tgt)).rejects.toThrow( + 'simulated secondary moveDistinctIds failure' + ) + + spy.mockRestore() + + const pSrc = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [src.id], + 'verify-primary-move-rolled-back-src' + ) + expect(pSrc.rows.map((r: any) => r.distinct_id).sort()).toEqual(['src-c', 'src-d']) + const pTgt = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [tgt.id], + 'verify-primary-move-rolled-back-tgt' + ) + expect(pTgt.rows.map((r: any) => r.distinct_id).sort()).toEqual(['tgt-c']) + const sTgt = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2) ORDER BY distinct_id', + [team.id, tgtUuid], + 'verify-secondary-move-rolled-back-tgt' + ) + expect(sTgt.rows.map((r: any) => r.distinct_id).sort()).toEqual(['tgt-c']) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const srcUuid = '99999999-9999-9999-9999-999999999999' + const tgtUuid = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' + + const { person: src } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + srcUuid, + [{ distinctId: 'src-did', version: 0 }] + )) as any + + const { person: tgt } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + tgtUuid, + [{ distinctId: 'tgt-did', version: 0 }] + )) as any + + await repository.addDistinctId(src, 'src-did-2', 1) + + const mockSpy = mockDatabaseError( + postgres, + new Error('primary moveDistinctIds failed'), + 'updateDistinctIdPerson' + ) + + await expect(repository.moveDistinctIds(src, tgt)).rejects.toThrow('primary moveDistinctIds failed') + + mockSpy.mockRestore() + + await verifyDistinctIdsForPerson(team.id, src.id, srcUuid, ['src-did', 'src-did-2']) + await verifyDistinctIdsForPerson(team.id, tgt.id, tgtUuid, ['tgt-did']) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const srcUuid = 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb' + const tgtUuid = 'cccccccc-cccc-cccc-cccc-cccccccccccc' + + const { person: src } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + srcUuid, + [{ distinctId: 'src-did-b', version: 0 }] + )) as any + + const { person: tgt } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + tgtUuid, + [{ distinctId: 'tgt-did-b', version: 0 }] + )) as any + + await repository.addDistinctId(src, 'src-did-b-2', 1) + + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary moveDistinctIds failed'), + 'updateDistinctIdPerson' + ) + + await expect(repository.moveDistinctIds(src, tgt)).rejects.toThrow('secondary moveDistinctIds failed') + + mockSpy.mockRestore() + + await verifyDistinctIdsForPerson(team.id, src.id, srcUuid, ['src-did-b', 'src-did-b-2']) + await verifyDistinctIdsForPerson(team.id, tgt.id, tgtUuid, ['tgt-did-b']) + }) + }) + + describe('addPersonlessDistinctId() 2PC tests', () => { + it('writes to both (happy path)', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-1' + const inserted = await repository.addPersonlessDistinctId(team.id, did) + expect(inserted === true || inserted === false).toBe(true) + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-personless' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-secondary-personless' + ) + expect(p.rows.length).toBe(1) + expect(s.rows.length).toBe(1) + }) + + it('rolls back both when secondary addPersonlessDistinctId fails', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-fail' + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'addPersonlessDistinctId') + .mockRejectedValue(new Error('simulated secondary personless failure')) + + await expect(repository.addPersonlessDistinctId(team.id, did)).rejects.toThrow( + 'simulated secondary personless failure' + ) + + spy.mockRestore() + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-personless-rolled-back' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-secondary-personless-rolled-back' + ) + expect(p.rows.length).toBe(0) + expect(s.rows.length).toBe(0) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-primary-fail' + + const mockSpy = mockDatabaseError( + postgres, + new Error('primary addPersonlessDistinctId failed'), + 'addPersonlessDistinctId' + ) + + await expect(repository.addPersonlessDistinctId(team.id, did)).rejects.toThrow( + 'primary addPersonlessDistinctId failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-no-personless', + 'verify-secondary-no-personless' + ) + + const recordCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-no-personless' + ) + expect(recordCheck.rows.length).toBe(0) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-secondary-fail' + + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary addPersonlessDistinctId failed'), + 'addPersonlessDistinctId' + ) + + await expect(repository.addPersonlessDistinctId(team.id, did)).rejects.toThrow( + 'secondary addPersonlessDistinctId failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-no-personless', + 'verify-secondary-no-personless' + ) + + const recordCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-no-personless' + ) + expect(recordCheck.rows.length).toBe(0) + }) + }) + + describe('addPersonlessDistinctIdForMerge() 2PC tests', () => { + it('writes to both (happy path)', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-merge-1' + const merged = await repository.addPersonlessDistinctIdForMerge(team.id, did) + expect(merged === true || merged === false).toBe(true) + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-personless-merge' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-secondary-personless-merge' + ) + expect(p.rows.length).toBe(1) + expect(s.rows.length).toBe(1) + }) + + it('rolls back both when secondary addPersonlessDistinctIdForMerge fails', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-merge-2' + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'addPersonlessDistinctIdForMerge') + .mockRejectedValue(new Error('simulated secondary personless-merge failure')) + + await expect(repository.addPersonlessDistinctIdForMerge(team.id, did)).rejects.toThrow( + 'simulated secondary personless-merge failure' + ) + + spy.mockRestore() + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-personless-merge-rolled-back' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-secondary-personless-merge-rolled-back' + ) + expect(p.rows.length).toBe(0) + expect(s.rows.length).toBe(0) + }) + + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-merge-primary-fail' + + const mockSpy = mockDatabaseError( + postgres, + new Error('primary addPersonlessDistinctIdForMerge failed'), + 'addPersonlessDistinctIdForMerge' + ) + + await expect(repository.addPersonlessDistinctIdForMerge(team.id, did)).rejects.toThrow( + 'primary addPersonlessDistinctIdForMerge failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-no-merge', + 'verify-secondary-no-merge' + ) + + const recordCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-no-merge' + ) + expect(recordCheck.rows.length).toBe(0) + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const did = 'personless-merge-secondary-fail' + + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary addPersonlessDistinctIdForMerge failed'), + 'addPersonlessDistinctIdForMerge' + ) + + await expect(repository.addPersonlessDistinctIdForMerge(team.id, did)).rejects.toThrow( + 'secondary addPersonlessDistinctIdForMerge failed' + ) + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-primary-no-merge', + 'verify-secondary-no-merge' + ) + + const recordCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, did], + 'verify-no-merge' + ) + expect(recordCheck.rows.length).toBe(0) + }) + }) + + describe('updatePersonAssertVersion() non-2PC test', () => { + it('updates secondary on primary success (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid = '77777777-7777-7777-7777-777777777777' + const { person } = (await repository.createPerson( + createdAt, + { x: 1 }, + {}, + {}, + team.id, + null, + false, + uuid + )) as any + + const [version] = await repository.updatePersonAssertVersion({ + id: person.id, + team_id: person.team_id, + uuid: person.uuid, + distinct_id: 'dw-assert', + properties: { x: 2 }, + properties_last_updated_at: {}, + properties_last_operation: {}, + created_at: person.created_at, + version: person.version, + is_identified: person.is_identified, + is_user_id: person.is_user_id, + needs_write: true, + properties_to_set: { x: 2 }, + properties_to_unset: [], + }) + expect(version).toBe(person.version + 1) + + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties, version FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-assert' + ) + expect(s.rows[0].properties).toEqual({ x: 2 }) + expect(Number(s.rows[0].version || 0)).toBe(person.version + 1) + }) + + it('does not rollback primary when secondary fails (non-2PC)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-19T10:30:00.000Z').toUTC() + const uuid = '77777777-7777-7777-7777-777777777778' + const { person } = (await repository.createPerson( + createdAt, + { x: 1 }, + {}, + {}, + team.id, + null, + false, + uuid + )) as any + + const spy = jest + .spyOn((repository as any).secondaryRepo, 'updatePersonAssertVersion') + .mockRejectedValue(new Error('secondary assert-version failure')) + + await expect( + repository.updatePersonAssertVersion({ + id: person.id, + team_id: person.team_id, + uuid: person.uuid, + distinct_id: 'dw-assert-2', + properties: { x: 2 }, + properties_last_updated_at: {}, + properties_last_operation: {}, + created_at: person.created_at, + version: person.version, + is_identified: person.is_identified, + is_user_id: person.is_user_id, + needs_write: true, + properties_to_set: { x: 2 }, + properties_to_unset: [], + }) + ).rejects.toThrow('secondary assert-version failure') + + spy.mockRestore() + + const p = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-assert-non-tx' + ) + const s = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT properties FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-assert-non-tx' + ) + expect(p.rows[0].properties).toEqual({ x: 2 }) + expect(s.rows[0].properties).toEqual({ x: 1 }) + }) + }) + + describe('updateCohortsAndFeatureFlagsForMerge() non-2PC tests', () => { + it('rolls back when primary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid1 = 'dddddddd-dddd-dddd-dddd-dddddddddddd' + const uuid2 = 'eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee' + + // Create two persons + const { person: person1 } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + uuid1, + [{ distinctId: 'person1', version: 0 }] + )) as any + + const { person: person2 } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + uuid2, + [{ distinctId: 'person2', version: 0 }] + )) as any + + // Mock primary database to fail during cohort update + const mockSpy = mockDatabaseError( + postgres, + new Error('primary cohort update failed'), + 'updateCohortAndFeatureFlagsPeople' + ) + + await expect( + repository.updateCohortsAndFeatureFlagsForMerge(team.id, person1.id, person2.id) + ).rejects.toThrow('primary cohort update failed') + + mockSpy.mockRestore() + + // Since this method doesn't use 2PC (it's a best-effort update), + // we just verify that the error was thrown correctly + // The actual state depends on when the error occurred + }) + + it('rolls back when secondary database fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + const uuid1 = 'ffffffff-ffff-ffff-ffff-ffffffffffff' + const uuid2 = '00000000-0000-0000-0000-000000000001' + + // Create two persons + const { person: person1 } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + uuid1, + [{ distinctId: 'person1b', version: 0 }] + )) as any + + const { person: person2 } = (await repository.createPerson( + createdAt, + {}, + {}, + {}, + team.id, + null, + false, + uuid2, + [{ distinctId: 'person2b', version: 0 }] + )) as any + + // Mock secondary database to fail during cohort update + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary cohort update failed'), + 'updateCohortAndFeatureFlagsPeople' + ) + + await expect( + repository.updateCohortsAndFeatureFlagsForMerge(team.id, person1.id, person2.id) + ).rejects.toThrow('secondary cohort update failed') + + mockSpy.mockRestore() + + // Since this method doesn't use 2PC (it's a best-effort update), + // we just verify that the error was thrown correctly + }) + }) + + describe('inTransaction() 2PC tests', () => { + it('should execute multiple operations atomically within a transaction (happy path)', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const uuid = '11111111-1111-1111-1111-111111111111' + + const result = await repository.inTransaction('test-multi-operation', async (tx) => { + // Create a person within the transaction + const createResult = await tx.createPerson( + createdAt, + { name: 'Transaction Test', age: 25 }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'tx-did-1', version: 0 }] + ) + + if (!createResult.success) { + throw new Error('Failed to create person in transaction') + } + + // Add another distinct ID + await tx.addDistinctId(createResult.person, 'tx-did-2', 1) + + // Update the person + const [updatedPerson] = await tx.updatePerson(createResult.person, { + properties: { name: 'Updated Name', age: 26 }, + }) + + return updatedPerson + }) + + // Verify the transaction succeeded and data is consistent across both databases + const primaryPerson = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-tx-person' + ) + const secondaryPerson = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-tx-person' + ) + + expect(primaryPerson.rows.length).toBe(1) + expect(secondaryPerson.rows.length).toBe(1) + expect(primaryPerson.rows[0].properties).toEqual({ name: 'Updated Name', age: 26 }) + expect(secondaryPerson.rows[0].properties).toEqual({ name: 'Updated Name', age: 26 }) + + // Verify distinct IDs + await verifyDistinctIdsForPerson(team.id, result.id, uuid, ['tx-did-1', 'tx-did-2']) + }) + + it('should rollback all operations when any operation fails within transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const uuid = '22222222-2222-2222-2222-222222222222' + + // Mock to make addDistinctId fail on secondary + const spy = jest.spyOn((repository as any).secondaryRepo, 'addDistinctId') + spy.mockRejectedValueOnce(new Error('simulated addDistinctId failure in transaction')) + + await expect( + repository.inTransaction('test-rollback', async (tx) => { + // Create a person - this should succeed initially + const createResult = await tx.createPerson( + createdAt, + { name: 'Will Rollback' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'tx-rollback-1', version: 0 }] + ) + + if (!createResult.success) { + throw new Error('Failed to create person') + } + + await tx.addDistinctId(createResult.person, 'tx-rollback-2', 1) + + return createResult.person + }) + ).rejects.toThrow('simulated addDistinctId failure in transaction') + + spy.mockRestore() + + // Verify nothing was persisted to either database + const primaryCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-rollback' + ) + const secondaryCheck = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-secondary-rollback' + ) + + expect(primaryCheck.rows.length).toBe(0) + expect(secondaryCheck.rows.length).toBe(0) + + // Verify distinct IDs were also rolled back + const primaryDids = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id IN ($1, $2)', + ['tx-rollback-1', 'tx-rollback-2'], + 'verify-primary-dids-rollback' + ) + const secondaryDids = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_persondistinctid WHERE distinct_id IN ($1, $2)', + ['tx-rollback-1', 'tx-rollback-2'], + 'verify-secondary-dids-rollback' + ) + + expect(primaryDids.rows.length).toBe(0) + expect(secondaryDids.rows.length).toBe(0) + }) + + it('should handle complex merge scenario within transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const sourceUuid = '33333333-3333-3333-3333-333333333333' + const targetUuid = '44444444-4444-4444-4444-444444444444' + + // Create source and target persons first + const { person: sourcePerson } = (await repository.createPerson( + createdAt, + { source: true }, + {}, + {}, + team.id, + null, + false, + sourceUuid, + [{ distinctId: 'source-main', version: 0 }] + )) as any + + const { person: targetPerson } = (await repository.createPerson( + createdAt, + { target: true }, + {}, + {}, + team.id, + null, + false, + targetUuid, + [{ distinctId: 'target-main', version: 0 }] + )) as any + + // Add extra distinct IDs to source + await repository.addDistinctId(sourcePerson, 'source-extra-1', 1) + await repository.addDistinctId(sourcePerson, 'source-extra-2', 2) + + // Perform merge operations in a transaction + await repository.inTransaction('test-merge', async (tx) => { + // Move distinct IDs from source to target + const moveResult = await tx.moveDistinctIds(sourcePerson, targetPerson) + if (!moveResult.success) { + throw new Error('Failed to move distinct IDs') + } + + // Delete the source person + await tx.deletePerson(sourcePerson) + + // Update cohorts and feature flags + await tx.updateCohortsAndFeatureFlagsForMerge(team.id, sourcePerson.id, targetPerson.id) + + return { moveResult, targetPersonId: targetPerson.id } + }) + + // Verify source person is deleted + const sourceCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [sourceUuid], + 'verify-source-deleted' + ) + expect(sourceCheck.rows.length).toBe(0) + + // Verify all distinct IDs moved to target + await verifyDistinctIdsForPerson(team.id, targetPerson.id, targetUuid, [ + 'source-extra-1', + 'source-extra-2', + 'source-main', + 'target-main', + ]) + }) + + it('should rollback merge operations when moveDistinctIds fails', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const sourceUuid = '55555555-5555-5555-5555-555555555555' + const targetUuid = '66666666-6666-6666-6666-666666666666' + + // Create source and target persons + const { person: sourcePerson } = (await repository.createPerson( + createdAt, + { source: true }, + {}, + {}, + team.id, + null, + false, + sourceUuid, + [{ distinctId: 'merge-fail-source', version: 0 }] + )) as any + + const { person: targetPerson } = (await repository.createPerson( + createdAt, + { target: true }, + {}, + {}, + team.id, + null, + false, + targetUuid, + [{ distinctId: 'merge-fail-target', version: 0 }] + )) as any + + // Mock moveDistinctIds to fail + const spy = jest.spyOn((repository as any).secondaryRepo, 'moveDistinctIds') + spy.mockRejectedValueOnce(new Error('simulated moveDistinctIds failure')) + + await expect( + repository.inTransaction('test-merge-rollback', async (tx) => { + // This should fail and rollback + const moveResult = await tx.moveDistinctIds(sourcePerson, targetPerson) + + // These should not execute + await tx.deletePerson(sourcePerson) + await tx.updateCohortsAndFeatureFlagsForMerge(team.id, sourcePerson.id, targetPerson.id) + + return moveResult + }) + ).rejects.toThrow('simulated moveDistinctIds failure') + + spy.mockRestore() + + // Verify source person still exists + const sourceCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [sourceUuid], + 'verify-source-not-deleted' + ) + expect(sourceCheck.rows.length).toBe(1) + + // Verify distinct IDs remain with their original persons + await verifyDistinctIdsForPerson(team.id, sourcePerson.id, sourceUuid, ['merge-fail-source']) + await verifyDistinctIdsForPerson(team.id, targetPerson.id, targetUuid, ['merge-fail-target']) + }) + + it('should handle creation conflict within transaction correctly', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const uuid1 = '77777777-7777-7777-7777-777777777777' + const uuid2 = '88888888-8888-8888-8888-888888888888' + + // Create first person with a distinct ID + await repository.createPerson(createdAt, { first: true }, {}, {}, team.id, null, false, uuid1, [ + { distinctId: 'conflict-did', version: 0 }, + ]) + + // Try to create another person with the same distinct ID in a transaction + // Should now return a failure result instead of throwing + const result = await repository.inTransaction('test-conflict', async (tx) => { + const createResult = await tx.createPerson( + createdAt, + { second: true }, + {}, + {}, + team.id, + null, + false, + uuid2, + [{ distinctId: 'conflict-did', version: 0 }] + ) + + // The transaction should handle the conflict gracefully + expect(createResult.success).toBe(false) + if (!createResult.success) { + expect(createResult.error).toBe('CreationConflict') + } + + return createResult + }) + + expect(result.success).toBe(false) + if (!result.success) { + expect(result.error).toBe('CreationConflict') + } + + // Verify second person was not created + const secondCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid2], + 'verify-second-not-created' + ) + expect(secondCheck.rows.length).toBe(0) + }) + + it('should propagate errors correctly through transaction boundaries', async () => { + const team = await getFirstTeam(postgres) + const customError = new Error('Custom transaction error') + + await expect( + repository.inTransaction('test-error-propagation', async (tx) => { + await tx.addPersonlessDistinctIdForMerge(team.id, 'error-test-did') + + throw customError + }) + ).rejects.toThrow('Custom transaction error') + + const check = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT 1 FROM posthog_personlessdistinctid WHERE distinct_id = $1', + ['error-test-did'], + 'verify-personless-rollback' + ) + expect(check.rows.length).toBe(0) + }) + + it('should handle primary database failure within transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const uuid = '99999999-9999-9999-9999-999999999999' + + const mockSpy = mockDatabaseError( + postgres, + new Error('primary database failure in transaction'), + 'updatePerson' + ) + + await expect( + repository.inTransaction('test-primary-failure', async (tx) => { + const createResult = await tx.createPerson( + createdAt, + { initial: true }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'primary-fail-did', version: 0 }] + ) + + if (!createResult.success) { + throw new Error('Failed to create person') + } + + await tx.updatePerson(createResult.person, { properties: { updated: true } }) + + return createResult.person + }) + ).rejects.toThrow('primary database failure in transaction') + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-tx-rollback', + 'verify-secondary-tx-rollback' + ) + }) + + it('should handle secondary database failure within transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const uuid = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' + + const mockSpy = mockDatabaseError( + migrationPostgres, + new Error('secondary database failure in transaction'), + 'insertPerson' + ) + + await expect( + repository.inTransaction('test-secondary-failure', async (tx) => { + const createResult = await tx.createPerson( + createdAt, + { initial: true }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'secondary-fail-did', version: 0 }] + ) + + return createResult + }) + ).rejects.toThrow('secondary database failure in transaction') + + mockSpy.mockRestore() + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT 1 FROM posthog_person WHERE uuid = $1', + [uuid], + 'verify-primary-no-commit', + 'verify-secondary-no-commit' + ) + }) + + it('should throw error when moveDistinctIds has mismatched results in transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const sourceUuid = 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb' + const targetUuid = 'cccccccc-cccc-cccc-cccc-cccccccccccc' + + const { person: sourcePerson } = (await repository.createPerson( + createdAt, + { source: true }, + {}, + {}, + team.id, + null, + false, + sourceUuid, + [{ distinctId: 'mismatch-source', version: 0 }] + )) as any + + const { person: targetPerson } = (await repository.createPerson( + createdAt, + { target: true }, + {}, + {}, + team.id, + null, + false, + targetUuid, + [{ distinctId: 'mismatch-target', version: 0 }] + )) as any + + const spy = jest.spyOn((repository as any).secondaryRepo, 'moveDistinctIds') + spy.mockResolvedValueOnce({ success: false, error: 'TargetPersonDeleted' }) + + await expect( + repository.inTransaction('test-mismatch', async (tx) => { + const result = await tx.moveDistinctIds(sourcePerson, targetPerson) + return result + }) + ).rejects.toThrow('DualWrite moveDistinctIds mismatch') + + spy.mockRestore() + + await verifyDistinctIdsForPerson(team.id, sourcePerson.id, sourceUuid, ['mismatch-source']) + await verifyDistinctIdsForPerson(team.id, targetPerson.id, targetUuid, ['mismatch-target']) + }) + + it('should handle moveDistinctIds when both databases fail identically within transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const sourceUuid = 'eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee' + const targetUuid = 'ffffffff-ffff-ffff-ffff-ffffffffffff' + + const { person: sourcePerson } = (await repository.createPerson( + createdAt, + { source: true }, + {}, + {}, + team.id, + null, + false, + sourceUuid, + [{ distinctId: 'identical-fail-source', version: 0 }] + )) as any + + const { person: targetPerson } = (await repository.createPerson( + createdAt, + { target: true }, + {}, + {}, + team.id, + null, + false, + targetUuid, + [{ distinctId: 'identical-fail-target', version: 0 }] + )) as any + + const primarySpy = jest.spyOn((repository as any).primaryRepo, 'moveDistinctIds') + const secondarySpy = jest.spyOn((repository as any).secondaryRepo, 'moveDistinctIds') + primarySpy.mockResolvedValueOnce({ success: false, error: 'TargetPersonDeleted' }) + secondarySpy.mockResolvedValueOnce({ success: false, error: 'TargetPersonDeleted' }) + + const result = await repository.inTransaction('test-identical-failure', async (tx) => { + const moveResult = await tx.moveDistinctIds(sourcePerson, targetPerson) + expect(moveResult.success).toBe(false) + if (!moveResult.success) { + expect(moveResult.error).toBe('TargetPersonDeleted') + } + return moveResult + }) + + expect(result.success).toBe(false) + if (!result.success) { + expect(result.error).toBe('TargetPersonDeleted') + } + + primarySpy.mockRestore() + secondarySpy.mockRestore() + }) + + it('should prevent or handle nested inTransaction calls gracefully', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + + // Test what happens when inTransaction is called within another inTransaction + // This could happen if code inadvertently nests transaction calls + + let innerTransactionExecuted = false + let outerTransactionCompleted = false + let errorCaught: Error | null = null + + try { + await repository.inTransaction('outer-transaction', async (outerTx) => { + // Create a person in the outer transaction + const outerResult = await outerTx.createPerson( + createdAt, + { level: 'outer' }, + {}, + {}, + team.id, + null, + false, + '11111111-0000-0000-0000-000000000001', + [{ distinctId: 'outer-tx-did', version: 0 }] + ) + + if (!outerResult.success) { + throw new Error('Outer transaction creation failed') + } + + // Attempt to nest another transaction + // This should either fail or be handled gracefully + try { + await repository.inTransaction('inner-transaction', async (innerTx) => { + innerTransactionExecuted = true + const innerResult = await innerTx.createPerson( + createdAt, + { level: 'inner' }, + {}, + {}, + team.id, + null, + false, + '22222222-0000-0000-0000-000000000002', + [{ distinctId: 'inner-tx-did', version: 0 }] + ) + return innerResult + }) + } catch (e: any) { + // Nested transaction might fail + errorCaught = e + } + + outerTransactionCompleted = true + return outerResult + }) + } catch (e: any) { + errorCaught = e + } + + // Check the behavior - either: + // 1. Nested transactions are not supported and throw an error + // 2. They work but use savepoints + // 3. They work but are actually part of the same transaction + + if (errorCaught) { + // If an error was thrown, it's likely because nested transactions aren't supported + // This is actually good behavior to prevent transaction confusion + expect(errorCaught.message).toMatch(/transaction|nested|already|active/i) + } else { + // If no error, check if both persons were created + const outerPerson = await repository.fetchPerson(team.id, 'outer-tx-did') + const innerPerson = await repository.fetchPerson(team.id, 'inner-tx-did') + + if (innerTransactionExecuted) { + // Both should exist if nested transactions are handled + expect(outerPerson).toBeDefined() + expect(innerPerson).toBeDefined() + } + expect(outerTransactionCompleted).toBe(true) + } + }) + + it('should propagate errors correctly through transaction boundaries', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + + // Test various error types and their propagation + + // Test 1: Custom application error + const customError = new Error('Custom application error in transaction') + await expect( + repository.inTransaction('test-custom-error', async (tx) => { + await tx.createPerson( + createdAt, + { test: 'error-propagation' }, + {}, + {}, + team.id, + null, + false, + '33333333-0000-0000-0000-000000000003', + [{ distinctId: 'error-test-1', version: 0 }] + ) + throw customError + }) + ).rejects.toThrow('Custom application error in transaction') + + // Verify the person was not created due to rollback + const person1 = await repository.fetchPerson(team.id, 'error-test-1') + expect(person1).toBeUndefined() + + // Test 2: Database constraint error + const uuid4 = '44444444-0000-0000-0000-000000000004' + await repository.createPerson(createdAt, { existing: true }, {}, {}, team.id, null, false, uuid4, [ + { distinctId: 'constraint-test', version: 0 }, + ]) + + // Try to create with same distinct ID in transaction + const result = await repository.inTransaction('test-constraint-error', async (tx) => { + const result = await tx.createPerson( + createdAt, + { duplicate: true }, + {}, + {}, + team.id, + null, + false, + '55555555-0000-0000-0000-000000000005', + [{ distinctId: 'constraint-test', version: 0 }] + ) + return result + }) + + // CreationConflict should be returned, not thrown + expect(result.success).toBe(false) + if (!result.success) { + expect(result.error).toBe('CreationConflict') + } + + // Test 3: Error in the middle of multiple operations + await expect( + repository.inTransaction('test-mid-operation-error', async (tx) => { + // First operation succeeds + const person = await tx.createPerson( + createdAt, + { step: 1 }, + {}, + {}, + team.id, + null, + false, + '66666666-0000-0000-0000-000000000006', + [{ distinctId: 'multi-op-1', version: 0 }] + ) + + if (!person.success) { + throw new Error('First operation failed') + } + + // Second operation succeeds + await tx.addDistinctId(person.person!, 'multi-op-2', 1) + + // Third operation fails + throw new Error('Intentional failure after partial success') + }) + ).rejects.toThrow('Intentional failure after partial success') + + // Verify all operations were rolled back + const person2 = await repository.fetchPerson(team.id, 'multi-op-1') + const person3 = await repository.fetchPerson(team.id, 'multi-op-2') + expect(person2).toBeUndefined() + expect(person3).toBeUndefined() + }) + + it('should handle mixed direct and transactional calls correctly', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + + // Create a person outside transaction + const outsideResult = await repository.createPerson( + createdAt, + { location: 'outside' }, + {}, + {}, + team.id, + null, + false, + '77777777-0000-0000-0000-000000000007', + [{ distinctId: 'outside-tx', version: 0 }] + ) + + expect(outsideResult.success).toBe(true) + if (!outsideResult.success) { + throw new Error('Expected person creation to succeed') + } + const outsidePerson = outsideResult.person + + // Now use it within a transaction + const txResult = await repository.inTransaction('test-mixed-calls', async (tx) => { + // Update the person created outside + const [updated] = await tx.updatePerson(outsidePerson, { + properties: { location: 'updated-inside', new_prop: 'added' }, + }) + + // Add a distinct ID + await tx.addDistinctId(updated, 'added-in-tx', 1) + + // Create a new person within the transaction + const newPerson = await tx.createPerson( + createdAt, + { location: 'inside' }, + {}, + {}, + team.id, + null, + false, + '88888888-0000-0000-0000-000000000008', + [{ distinctId: 'inside-tx', version: 0 }] + ) + + return { updated, newPerson } + }) + + // Verify the mixed operations worked + const updatedOutside = await repository.fetchPerson(team.id, 'outside-tx') + const addedDistinctId = await repository.fetchPerson(team.id, 'added-in-tx') + const insidePerson = await repository.fetchPerson(team.id, 'inside-tx') + + expect(updatedOutside).toBeDefined() + expect(updatedOutside?.properties.location).toBe('updated-inside') + expect(updatedOutside?.properties.new_prop).toBe('added') + expect(addedDistinctId?.id).toBe(outsidePerson.id) + expect(insidePerson).toBeDefined() + expect(txResult.newPerson.success).toBe(true) + }) + + it('should enforce version synchronization in updatePerson within transaction', async () => { + const team = await getFirstTeam(postgres) + const createdAt = DateTime.fromISO('2024-01-20T10:30:00.000Z').toUTC() + const uuid = 'dddddddd-dddd-dddd-dddd-dddddddddddd' + + const { person } = (await repository.createPerson( + createdAt, + { initial: 'value' }, + {}, + {}, + team.id, + null, + false, + uuid, + [{ distinctId: 'version-sync-did', version: 0 }] + )) as any + + const result = await repository.inTransaction('test-version-sync', async (tx) => { + const [updatedPerson] = await tx.updatePerson(person, { properties: { updated: 'value' } }) + return updatedPerson + }) + + // Verify version was properly synchronized between primary and secondary + const primaryVersion = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT version FROM posthog_person WHERE uuid = $1', + [uuid], + 'check-primary-version' + ) + const secondaryVersion = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT version FROM posthog_person WHERE uuid = $1', + [uuid], + 'check-secondary-version' + ) + + // Both should have the same version (primary's version + 1) + expect(Number(primaryVersion.rows[0].version)).toBe(person.version + 1) + expect(Number(secondaryVersion.rows[0].version)).toBe(person.version + 1) + expect(result.version).toBe(person.version + 1) + + // Verify that both databases have the same version even though secondary is forced to match primary + expect(Number(primaryVersion.rows[0].version)).toBe(Number(secondaryVersion.rows[0].version)) + }) + }) +}) diff --git a/plugin-server/src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository.ts b/plugin-server/src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository.ts new file mode 100644 index 0000000000..2e91f0667c --- /dev/null +++ b/plugin-server/src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository.ts @@ -0,0 +1,492 @@ +import { DateTime } from 'luxon' + +import { Properties } from '@posthog/plugin-scaffold' + +import { TopicMessage } from '../../../../kafka/producer' +import { InternalPerson, PropertiesLastOperation, PropertiesLastUpdatedAt, Team } from '../../../../types' +import { CreatePersonResult, MoveDistinctIdsResult } from '../../../../utils/db/db' +import { PostgresRouter, PostgresUse } from '../../../../utils/db/postgres' +import { TwoPhaseCommitCoordinator } from '../../../../utils/db/two-phase' +import { logger as _logger } from '../../../../utils/logger' +import { dualWriteComparisonCounter, dualWriteDataMismatchCounter } from '../metrics' +import { PersonUpdate } from '../person-update-batch' +import { DualWritePersonRepositoryTransaction } from './dualwrite-person-repository-transaction' +import { PersonRepository } from './person-repository' +import { PersonRepositoryTransaction } from './person-repository-transaction' +import type { PostgresPersonRepositoryOptions } from './postgres-person-repository' +import { PostgresPersonRepository } from './postgres-person-repository' +import { RawPostgresPersonRepository } from './raw-postgres-person-repository' + +export interface PostgresDualWritePersonRepositoryOptions extends PostgresPersonRepositoryOptions { + comparisonEnabled?: boolean +} + +export class PostgresDualWritePersonRepository implements PersonRepository { + private coordinator: TwoPhaseCommitCoordinator + private primaryRepo: RawPostgresPersonRepository + private secondaryRepo: RawPostgresPersonRepository + private comparisonEnabled: boolean + + constructor( + primaryRouter: PostgresRouter, + secondaryRouter: PostgresRouter, + options?: Partial + ) { + this.primaryRepo = new PostgresPersonRepository(primaryRouter, options) + this.secondaryRepo = new PostgresPersonRepository(secondaryRouter, options) + this.comparisonEnabled = options?.comparisonEnabled ?? false + this.coordinator = new TwoPhaseCommitCoordinator({ + left: { router: primaryRouter, use: PostgresUse.PERSONS_WRITE, name: 'primary' }, + right: { router: secondaryRouter, use: PostgresUse.PERSONS_WRITE, name: 'secondary' }, + }) + } + + // a read, just use the primary as the source of truth (will decide in the underlying logic whether to use reader/writer) + async fetchPerson( + teamId: Team['id'], + distinctId: string, + options?: { forUpdate?: boolean; useReadReplica?: boolean } + ): Promise { + return await this.primaryRepo.fetchPerson(teamId, distinctId, options) + } + + /* + * needs to have the exact same contract as the single-write repo + */ + async createPerson( + createdAt: DateTime, + properties: Properties, + propertiesLastUpdatedAt: PropertiesLastUpdatedAt, + propertiesLastOperation: PropertiesLastOperation, + teamId: Team['id'], + isUserId: number | null, + isIdentified: boolean, + uuid: string, + distinctIds?: { distinctId: string; version?: number }[] + ): Promise { + let result!: CreatePersonResult + try { + await this.coordinator.run('createPerson', async (leftTx, rightTx) => { + // create is serial: create on primary first, then use returned id the DB generated on secondary + const p = await this.primaryRepo.createPerson( + createdAt, + properties, + propertiesLastUpdatedAt, + propertiesLastOperation, + teamId, + isUserId, + isIdentified, + uuid, + distinctIds, + leftTx + ) + if (!p.success) { + result = p + throw new Error('DualWrite abort: primary creation conflict') + } + + // force same id on secondary + const forcedId = Number(p.person.id) + const s = await this.secondaryRepo.createPerson( + createdAt, + properties, + propertiesLastUpdatedAt, + propertiesLastOperation, + teamId, + isUserId, + isIdentified, + uuid, + distinctIds, + rightTx, + forcedId + ) + if (!s.success) { + result = s + throw new Error('DualWrite abort: secondary creation conflict') + } + + // Compare results between primary and secondary + if (this.comparisonEnabled) { + this.compareCreatePersonResults(p, s) + } + + result = p + return true + }) + } catch (err) { + // if we captured a handled conflict from either side, surface it to match single-write behaviour + if (result && !result.success && result.error === 'CreationConflict') { + return result + } + throw err + } + return result + } + + async updatePerson( + person: InternalPerson, + update: Partial, + tag?: string + ): Promise<[InternalPerson, TopicMessage[], boolean]> { + // Enforce version parity across primary/secondary: run primary first, then set secondary to primary's new version + let primaryOut!: [InternalPerson, TopicMessage[], boolean] + await this.coordinator.run(`updatePerson:${tag ?? 'update'}`, async (leftTx, rightTx) => { + const p = await this.primaryRepo.updatePerson(person, { ...update }, tag, leftTx) + primaryOut = p + + const primaryUpdated = p[0] + const secondaryUpdate: Partial = { + properties: primaryUpdated.properties, + properties_last_updated_at: primaryUpdated.properties_last_updated_at, + properties_last_operation: primaryUpdated.properties_last_operation, + is_identified: primaryUpdated.is_identified, + version: primaryUpdated.version, + } + + const secondaryOut = await this.secondaryRepo.updatePerson( + person, + secondaryUpdate, + tag ? `${tag}-secondary` : undefined, + rightTx + ) + + // Compare results between primary and secondary + if (this.comparisonEnabled) { + this.compareUpdatePersonResults(primaryOut, secondaryOut, tag) + } + + return true + }) + return primaryOut + } + + // No 2PC for this method, pretty sure its disabled in production + async updatePersonAssertVersion(personUpdate: PersonUpdate): Promise<[number | undefined, TopicMessage[]]> { + let primaryOut!: [number | undefined, TopicMessage[]] + await this.coordinator.run('updatePersonAssertVersion', async () => { + const p = await this.primaryRepo.updatePersonAssertVersion({ ...personUpdate }) + primaryOut = p + + // Only perform secondary if the optimistic update succeeded on primary + if (p[0] !== undefined) { + const s = await this.secondaryRepo.updatePersonAssertVersion({ ...personUpdate }) + + // Compare results + if (this.comparisonEnabled) { + if (p[0] !== s[0]) { + dualWriteComparisonCounter.inc({ + operation: 'updatePersonAssertVersion', + comparison_type: 'version_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: 'updatePersonAssertVersion', + comparison_type: 'version_match', + result: 'match', + }) + } + + // Compare message counts + this.compareTopicMessages('updatePersonAssertVersion', p[1], s[1]) + } + } + return true + }) + return primaryOut + } + + async deletePerson(person: InternalPerson): Promise { + let messages!: TopicMessage[] + await this.coordinator.run('deletePerson', async (lTx, rTx) => { + const [p, s] = await Promise.all([ + this.primaryRepo.deletePerson(person, lTx), + this.secondaryRepo.deletePerson(person, rTx), + ]) + + if (this.comparisonEnabled) { + this.compareTopicMessages('deletePerson', p, s) + } + + messages = p + return true + }) + return messages + } + + async addDistinctId(person: InternalPerson, distinctId: string, version: number): Promise { + let messages!: TopicMessage[] + await this.coordinator.run('addDistinctId', async (lTx, rTx) => { + const [p, s] = await Promise.all([ + this.primaryRepo.addDistinctId(person, distinctId, version, lTx), + this.secondaryRepo.addDistinctId(person, distinctId, version, rTx), + ]) + + if (this.comparisonEnabled) { + this.compareTopicMessages('addDistinctId', p, s) + } + + messages = p + return true + }) + return messages + } + + async moveDistinctIds( + source: InternalPerson, + target: InternalPerson, + limit?: number + ): Promise { + let pResult!: MoveDistinctIdsResult + await this.coordinator.run('moveDistinctIds', async (lTx, rTx) => { + const [p, s] = await Promise.all([ + this.primaryRepo.moveDistinctIds(source, target, limit, lTx), + this.secondaryRepo.moveDistinctIds(source, target, limit, rTx), + ]) + // If both repositories return the same failure result, that's expected behavior + // (e.g., both detected that the target person was deleted) + if (!p.success && !s.success && p.error === s.error) { + pResult = p + // return false to rollback the transaction; the database failed anyhow so probably don't need to rollback + return false + } + // If there's a mismatch in success or error type, that's unexpected + if (p.success !== s.success || (!p.success && !s.success && p.error !== s.error)) { + // Emit metric for mismatch + if (this.comparisonEnabled) { + dualWriteComparisonCounter.inc({ + operation: 'moveDistinctIds', + comparison_type: p.success !== s.success ? 'success_mismatch' : 'error_mismatch', + result: 'mismatch', + }) + } + pResult = p + // need to make sure we rollback this transaction + return false + } + pResult = p + return p.success + }) + return pResult + } + + async addPersonlessDistinctId(teamId: Team['id'], distinctId: string): Promise { + let isMerged!: boolean + await this.coordinator.run('addPersonlessDistinctId', async (lTx, rTx) => { + const [p, s] = await Promise.all([ + this.primaryRepo.addPersonlessDistinctId(teamId, distinctId, lTx), + this.secondaryRepo.addPersonlessDistinctId(teamId, distinctId, rTx), + ]) + + if (this.comparisonEnabled) { + if (p !== s) { + dualWriteComparisonCounter.inc({ + operation: 'addPersonlessDistinctId', + comparison_type: 'boolean_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: 'addPersonlessDistinctId', + comparison_type: 'boolean_match', + result: 'match', + }) + } + } + + isMerged = p + }) + return isMerged + } + + async fetchPersonDistinctIds(person: InternalPerson, limit?: number): Promise { + // This is a read operation, only use primary + return await this.primaryRepo.fetchPersonDistinctIds(person, limit) + } + + async addPersonlessDistinctIdForMerge(teamId: Team['id'], distinctId: string): Promise { + let isMerged!: boolean + await this.coordinator.run('addPersonlessDistinctIdForMerge', async (lTx, rTx) => { + const [p, s] = await Promise.all([ + this.primaryRepo.addPersonlessDistinctIdForMerge(teamId, distinctId, lTx), + this.secondaryRepo.addPersonlessDistinctIdForMerge(teamId, distinctId, rTx), + ]) + + if (this.comparisonEnabled) { + if (p !== s) { + dualWriteComparisonCounter.inc({ + operation: 'addPersonlessDistinctIdForMerge', + comparison_type: 'boolean_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: 'addPersonlessDistinctIdForMerge', + comparison_type: 'boolean_match', + result: 'match', + }) + } + } + + isMerged = p + }) + return isMerged + } + + async personPropertiesSize(personId: string): Promise { + return await this.primaryRepo.personPropertiesSize(personId) + } + + async updateCohortsAndFeatureFlagsForMerge( + teamID: Team['id'], + sourcePersonID: InternalPerson['id'], + targetPersonID: InternalPerson['id'] + ): Promise { + await this.coordinator.run('updateCohortsAndFeatureFlagsForMerge', async (lTx, rTx) => { + await Promise.all([ + this.primaryRepo.updateCohortsAndFeatureFlagsForMerge(teamID, sourcePersonID, targetPersonID, lTx), + this.secondaryRepo.updateCohortsAndFeatureFlagsForMerge(teamID, sourcePersonID, targetPersonID, rTx), + ]) + return true + }) + } + async inTransaction( + description: string, + transaction: (tx: PersonRepositoryTransaction) => Promise + ): Promise { + // Open a 2PC boundary spanning the entire callback. + let result!: T + try { + await this.coordinator.run(`dual-tx:${description}`, async (lTx, rTx) => { + const txWrapper = new DualWritePersonRepositoryTransaction( + this.primaryRepo, + this.secondaryRepo, + lTx, + rTx, + this.comparisonEnabled + ) + result = await transaction(txWrapper) + return true + }) + } catch (err: any) { + // Handle special cases where the transaction wrapper throws but we want to return a result + // This matches the behavior of the direct createPerson method + if (err.result && !err.result.success && err.result.error === 'CreationConflict') { + return err.result as T + } + throw err + } + return result + } + + private compareCreatePersonResults(primary: CreatePersonResult, secondary: CreatePersonResult): void { + if (primary.success !== secondary.success) { + dualWriteComparisonCounter.inc({ + operation: 'createPerson', + comparison_type: 'success_mismatch', + result: 'mismatch', + }) + return + } + + if (!primary.success || !secondary.success) { + // Both failed, check if error types match + if (!primary.success && !secondary.success && primary.error !== secondary.error) { + dualWriteComparisonCounter.inc({ + operation: 'createPerson', + comparison_type: 'error_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation: 'createPerson', + comparison_type: 'error_match', + result: 'match', + }) + } + return + } + + // Both succeeded, compare person data + const p = primary.person + const s = secondary.person + let hasMismatch = false + + if (JSON.stringify(p.properties) !== JSON.stringify(s.properties)) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson', field: 'properties' }) + hasMismatch = true + } + if (p.version !== s.version) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson', field: 'version' }) + hasMismatch = true + } + if (p.is_identified !== s.is_identified) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson', field: 'is_identified' }) + hasMismatch = true + } + if (p.is_user_id !== s.is_user_id) { + dualWriteDataMismatchCounter.inc({ operation: 'createPerson', field: 'is_user_id' }) + hasMismatch = true + } + + dualWriteComparisonCounter.inc({ + operation: 'createPerson', + comparison_type: 'data_comparison', + result: hasMismatch ? 'mismatch' : 'match', + }) + } + + private compareUpdatePersonResults( + primary: [InternalPerson, TopicMessage[], boolean], + secondary: [InternalPerson, TopicMessage[], boolean], + tag?: string + ): void { + const [pPerson, pMessages, pChanged] = primary + const [sPerson, sMessages, sChanged] = secondary + let hasMismatch = false + + if (JSON.stringify(pPerson.properties) !== JSON.stringify(sPerson.properties)) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson:${tag ?? 'update'}`, field: 'properties' }) + hasMismatch = true + } + if (pPerson.version !== sPerson.version) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson:${tag ?? 'update'}`, field: 'version' }) + hasMismatch = true + } + if (pPerson.is_identified !== sPerson.is_identified) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson:${tag ?? 'update'}`, field: 'is_identified' }) + hasMismatch = true + } + if (pChanged !== sChanged) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson:${tag ?? 'update'}`, field: 'changed_flag' }) + hasMismatch = true + } + + if (pMessages.length !== sMessages.length) { + dualWriteDataMismatchCounter.inc({ operation: `updatePerson:${tag ?? 'update'}`, field: 'message_count' }) + hasMismatch = true + } + + dualWriteComparisonCounter.inc({ + operation: `updatePerson:${tag ?? 'update'}`, + comparison_type: 'data_comparison', + result: hasMismatch ? 'mismatch' : 'match', + }) + } + + private compareTopicMessages(operation: string, primary: TopicMessage[], secondary: TopicMessage[]): void { + if (primary.length !== secondary.length) { + dualWriteComparisonCounter.inc({ + operation, + comparison_type: 'message_count_mismatch', + result: 'mismatch', + }) + } else { + dualWriteComparisonCounter.inc({ + operation, + comparison_type: 'message_count_match', + result: 'match', + }) + } + } +} diff --git a/plugin-server/src/worker/ingestion/persons/repositories/postgres-person-repository.ts b/plugin-server/src/worker/ingestion/persons/repositories/postgres-person-repository.ts index ce713d2bd6..2e5c2223ab 100644 --- a/plugin-server/src/worker/ingestion/persons/repositories/postgres-person-repository.ts +++ b/plugin-server/src/worker/ingestion/persons/repositories/postgres-person-repository.ts @@ -62,7 +62,8 @@ export class PostgresPersonRepository private async handleOversizedPersonProperties( person: InternalPerson, - update: Partial + update: Partial, + tx?: TransactionClient ): Promise<[InternalPerson, TopicMessage[], boolean]> { const currentSize = await this.personPropertiesSize(person.id) @@ -71,7 +72,7 @@ export class PostgresPersonRepository personPropertiesSizeViolationCounter.inc({ violation_type: 'existing_record_violates_limit', }) - return await this.handleExistingOversizedRecord(person, update) + return await this.handleExistingOversizedRecord(person, update, tx) } catch (error) { logger.warn('Failed to handle previously oversized person record', { team_id: person.team_id, @@ -107,7 +108,8 @@ export class PostgresPersonRepository private async handleExistingOversizedRecord( person: InternalPerson, - update: Partial + update: Partial, + tx?: TransactionClient ): Promise<[InternalPerson, TopicMessage[], boolean]> { try { const trimmedProperties = this.trimPropertiesToFitSize( @@ -125,7 +127,8 @@ export class PostgresPersonRepository const [updatedPerson, kafkaMessages, versionDisparity] = await this.updatePerson( person, trimmedUpdate, - 'oversized_properties_remediation' + 'oversized_properties_remediation', + tx ) oversizedPersonPropertiesTrimmedCounter.inc({ result: 'success' }) return [updatedPerson, kafkaMessages, versionDisparity] @@ -258,9 +261,11 @@ export class PostgresPersonRepository isIdentified: boolean, uuid: string, distinctIds?: { distinctId: string; version?: number }[], - tx?: TransactionClient + tx?: TransactionClient, + // Used to support dual-write; we want to force the id a person is created with to prevent drift + forcedId?: number ): Promise { - distinctIds ||= [] + distinctIds = distinctIds || [] for (const distinctId of distinctIds) { distinctId.version ||= 0 @@ -270,47 +275,66 @@ export class PostgresPersonRepository const personVersion = 0 try { - const { rows } = await this.postgres.query( - tx ?? PostgresUse.PERSONS_WRITE, + const baseColumns = [ + 'created_at', + 'properties', + 'properties_last_updated_at', + 'properties_last_operation', + 'team_id', + 'is_user_id', + 'is_identified', + 'uuid', + 'version', + ] + const columns = forcedId ? ['id', ...baseColumns] : baseColumns + + const valuePlaceholders = columns.map((_, i) => `$${i + 1}`).join(', ') + + const baseParams = [ + createdAt.toISO(), + sanitizeJsonbValue(properties), + sanitizeJsonbValue(propertiesLastUpdatedAt), + sanitizeJsonbValue(propertiesLastOperation), + teamId, + isUserId, + isIdentified, + uuid, + personVersion, + ] + const personParams = forcedId ? [forcedId, ...baseParams] : baseParams + + // Find the actual index of team_id in the personParams array (1-indexed for SQL) + const teamIdParamIndex = personParams.indexOf(teamId) + 1 + const distinctIdVersionStartIndex = columns.length + 1 + const distinctIdStartIndex = distinctIdVersionStartIndex + distinctIds.length + + const query = `WITH inserted_person AS ( - INSERT INTO posthog_person ( - created_at, properties, properties_last_updated_at, - properties_last_operation, team_id, is_user_id, is_identified, uuid, version - ) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + INSERT INTO posthog_person (${columns.join(', ')}) + VALUES (${valuePlaceholders}) RETURNING * )` + - distinctIds - .map( - // NOTE: Keep this in sync with the posthog_persondistinctid INSERT in - // `addDistinctId` - (_, index) => `, distinct_id_${index} AS ( + distinctIds + .map( + // NOTE: Keep this in sync with the posthog_persondistinctid INSERT in + // `addDistinctId` + (_, index) => `, distinct_id_${index} AS ( INSERT INTO posthog_persondistinctid (distinct_id, person_id, team_id, version) VALUES ( - $${11 + index + distinctIds!.length - 1}, + $${distinctIdStartIndex + index}, (SELECT id FROM inserted_person), - $5, - $${10 + index}) + $${teamIdParamIndex}, + $${distinctIdVersionStartIndex + index}) )` - ) - .join('') + - `SELECT * FROM inserted_person;`, + ) + .join('') + + ` SELECT * FROM inserted_person;` + + const { rows } = await this.postgres.query( + tx ?? PostgresUse.PERSONS_WRITE, + query, [ - createdAt.toISO(), - sanitizeJsonbValue(properties), - sanitizeJsonbValue(propertiesLastUpdatedAt), - sanitizeJsonbValue(propertiesLastOperation), - teamId, - isUserId, - isIdentified, - uuid, - personVersion, - // The copy and reverse here is to maintain compatability with pre-existing code - // and tests. Postgres appears to assign IDs in reverse order of the INSERTs in the - // CTEs above, so we need to reverse the distinctIds to match the old behavior where - // we would do a round trip for each INSERT. We shouldn't actually depend on the - // `id` column of distinct_ids, so this is just a simple way to keeps tests exactly - // the same and prove behavior is the same as before. + ...personParams, ...distinctIds .slice() .reverse() @@ -353,6 +377,7 @@ export class PostgresPersonRepository } catch (error) { // Handle constraint violation - another process created the person concurrently if (error instanceof Error && error.message.includes('unique constraint')) { + // This is not of type CreatePersonResult? return { success: false, error: 'CreationConflict', @@ -587,9 +612,9 @@ export class PostgresPersonRepository return rows.map((row) => row.distinct_id) } - async addPersonlessDistinctId(teamId: number, distinctId: string): Promise { + async addPersonlessDistinctId(teamId: number, distinctId: string, tx?: TransactionClient): Promise { const result = await this.postgres.query( - PostgresUse.PERSONS_WRITE, + tx ?? PostgresUse.PERSONS_WRITE, ` INSERT INTO posthog_personlessdistinctid (team_id, distinct_id, is_merged, created_at) VALUES ($1, $2, false, now()) @@ -606,7 +631,7 @@ export class PostgresPersonRepository // ON CONFLICT ... DO NOTHING won't give us our RETURNING, so we have to do another SELECT const existingResult = await this.postgres.query( - PostgresUse.PERSONS_WRITE, + tx ?? PostgresUse.PERSONS_WRITE, ` SELECT is_merged FROM posthog_personlessdistinctid @@ -752,7 +777,7 @@ export class PostgresPersonRepository return [updatedPerson, [kafkaMessage], versionDisparity > 0] } catch (error) { if (this.isPropertiesSizeConstraintViolation(error) && tag !== 'oversized_properties_remediation') { - return await this.handleOversizedPersonProperties(person, update) + return await this.handleOversizedPersonProperties(person, update, tx) } // Re-throw other errors diff --git a/plugin-server/src/worker/ingestion/persons/repositories/raw-postgres-person-repository.ts b/plugin-server/src/worker/ingestion/persons/repositories/raw-postgres-person-repository.ts index ff9ab25b24..43c5bdb258 100644 --- a/plugin-server/src/worker/ingestion/persons/repositories/raw-postgres-person-repository.ts +++ b/plugin-server/src/worker/ingestion/persons/repositories/raw-postgres-person-repository.ts @@ -25,7 +25,8 @@ export interface RawPostgresPersonRepository { isIdentified: boolean, uuid: string, distinctIds?: { distinctId: string; version?: number }[], - tx?: TransactionClient + tx?: TransactionClient, + forcedId?: number ): Promise updatePerson( @@ -54,8 +55,7 @@ export interface RawPostgresPersonRepository { ): Promise fetchPersonDistinctIds(person: InternalPerson, limit?: number, tx?: TransactionClient): Promise - - addPersonlessDistinctId(teamId: Team['id'], distinctId: string): Promise + addPersonlessDistinctId(teamId: Team['id'], distinctId: string, tx?: TransactionClient): Promise addPersonlessDistinctIdForMerge(teamId: Team['id'], distinctId: string, tx?: TransactionClient): Promise personPropertiesSize(personId: string): Promise diff --git a/plugin-server/src/worker/ingestion/persons/repositories/singlewrite-dualwrite-compat.test.ts b/plugin-server/src/worker/ingestion/persons/repositories/singlewrite-dualwrite-compat.test.ts new file mode 100644 index 0000000000..078414a0a7 --- /dev/null +++ b/plugin-server/src/worker/ingestion/persons/repositories/singlewrite-dualwrite-compat.test.ts @@ -0,0 +1,1257 @@ +// for testing scenarios and ensuring that outcomes for dualwrite and singlewrite are the same +// we should test: 1. contract returned is the same, 2. consistency across primary and secondary +import { DateTime } from 'luxon' + +import { TopicMessage } from '~/kafka/producer' +import { resetTestDatabase } from '~/tests/helpers/sql' +import { Hub, Team } from '~/types' +import { InternalPerson } from '~/types' +import { closeHub, createHub } from '~/utils/db/hub' +import { PostgresRouter, PostgresUse } from '~/utils/db/postgres' +import { UUIDT } from '~/utils/utils' + +import { uuidFromDistinctId } from '../../person-uuid' +import { PersonPropertiesSizeViolationError } from './person-repository' +import { PostgresDualWritePersonRepository } from './postgres-dualwrite-person-repository' +import { PostgresPersonRepository } from './postgres-person-repository' +import { + TEST_TIMESTAMP, + TEST_UUIDS, + assertConsistencyAcrossDatabases, + assertConsistentDatabaseErrorHandling, + assertCreatePersonConflictContractParity, + assertCreatePersonContractParity, + cleanupPrepared, + getFirstTeam, + mockDatabaseError, + setupMigrationDb, +} from './test-helpers' + +jest.mock('../../../../utils/logger') + +describe('Postgres Single Write - Postgres Dual Write Compatibility', () => { + let hub: Hub + let postgres: PostgresRouter + let migrationPostgres: PostgresRouter + let dualWriteRepository: PostgresDualWritePersonRepository + let singleWriteRepository: PostgresPersonRepository + + async function createPersonsInBothRepos( + team: Team, + properties: Record = { name: 'A' }, + singleDistinctId: string = 'single-a', + dualDistinctId: string = 'dual-a', + createdAt: DateTime = TEST_TIMESTAMP, + singleUuid: string = TEST_UUIDS.single, + dualUuid: string = TEST_UUIDS.dual + ) { + const [singleResult, dualResult] = await Promise.all([ + singleWriteRepository.createPerson(createdAt, properties, {}, {}, team.id, null, false, singleUuid, [ + { distinctId: singleDistinctId, version: 0 }, + ]), + dualWriteRepository.createPerson(createdAt, properties, {}, {}, team.id, null, false, dualUuid, [ + { distinctId: dualDistinctId, version: 0 }, + ]), + ]) + + if (!singleResult.success || !dualResult.success) { + throw new Error('Failed to create test persons') + } + + return { + singleResult: { ...singleResult, person: singleResult.person }, + dualResult: { ...dualResult, person: dualResult.person }, + } + } + + beforeEach(async () => { + hub = await createHub() + await resetTestDatabase(undefined, {}, {}, { withExtendedTestData: false }) + postgres = hub.db.postgres + migrationPostgres = hub.db.postgresPersonMigration + await setupMigrationDb(migrationPostgres) + + dualWriteRepository = new PostgresDualWritePersonRepository(postgres, migrationPostgres) + singleWriteRepository = new PostgresPersonRepository(postgres) + + const redis = await hub.redisPool.acquire() + await redis.flushdb() + await hub.redisPool.release(redis) + }) + + afterEach(async () => { + await cleanupPrepared(hub) + await closeHub(hub) + jest.clearAllMocks() + }) + + describe('createPerson() is compatible between single and dual write and consistent between primary and secondary', () => { + it('happy path createPerson()', async () => { + const team = await getFirstTeam(postgres) + + const singleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Bob' }, + {}, + {}, + team.id, + null, + true, + TEST_UUIDS.single, + [{ distinctId: 'single-a', version: 0 }] + ) + + const dualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Bob' }, + {}, + {}, + team.id, + null, + true, + TEST_UUIDS.dual, + [{ distinctId: 'dual-a', version: 0 }] + ) + + assertCreatePersonContractParity(singleResult, dualResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-create', + 'verify-secondary-create' + ) + }) + + it('createPerson() PersonPropertiesSizeViolation', async () => { + const team = await getFirstTeam(postgres) + + const singleSpy = mockDatabaseError( + postgres, + new PersonPropertiesSizeViolationError('too big', team.id, undefined), + 'insertPerson' + ) + let singleError: any + try { + await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + TEST_UUIDS.single, + [{ distinctId: 'single-a', version: 0 }] + ) + } catch (e) { + singleError = e + } + singleSpy.mockRestore() + + const dualSpy = mockDatabaseError( + migrationPostgres, + new PersonPropertiesSizeViolationError('too big', team.id, undefined), + 'insertPerson' + ) + let dualError: any + try { + await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + TEST_UUIDS.dual, + [{ distinctId: 'dual-a', version: 0 }] + ) + } catch (e) { + dualError = e + } + dualSpy.mockRestore() + + expect(singleError).toEqual(dualError) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.single], + 'verify-primary-create', + 'verify-secondary-create' + ) + }) + + it('createPerson() CreationConflict', async () => { + const team = await getFirstTeam(postgres) + const distinctId = 'primary-conflict-distinct' + + const seedP = await postgres.query( + PostgresUse.PERSONS_WRITE, + ` + WITH p AS ( + INSERT INTO posthog_person ( + created_at, properties, properties_last_updated_at, properties_last_operation, + team_id, is_user_id, is_identified, uuid, version + ) + VALUES (now(), '{}'::jsonb, '{}'::jsonb, '{}'::jsonb, $1, NULL, false, $2, 0) + RETURNING id + ) + INSERT INTO posthog_persondistinctid (distinct_id, person_id, team_id, version) + SELECT $3, p.id, $1, 0 FROM p + RETURNING person_id`, + [team.id, new UUIDT().toString(), distinctId], + 'seed-primary-pdi-conflict' + ) + expect(seedP.rows.length).toBe(1) + + const singleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + uuidFromDistinctId(team.id, distinctId), + [{ distinctId, version: 0 }] + ) + const dualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + uuidFromDistinctId(team.id, distinctId), + [{ distinctId, version: 0 }] + ) + + assertCreatePersonConflictContractParity(singleResult, dualResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [uuidFromDistinctId(team.id, distinctId)], + 'verify-primary-create', + 'verify-secondary-create' + ) + }) + it('createPerson() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'insertPerson', + () => + singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + TEST_UUIDS.single, + [{ distinctId: 'single-a', version: 0 }] + ), + () => + dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + TEST_UUIDS.single, + [{ distinctId: 'dual-a', version: 0 }] + ), + 'unhandled database error' + ) + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.single], + 'verify-primary-create', + 'verify-secondary-create' + ) + }) + }) + describe('updatePerson() is compatible between single and dual write and consistent between primary and secondary', () => { + function assertUpdatePersonContractParity( + singleResult: [InternalPerson, TopicMessage[], boolean], + dualResult: [InternalPerson, TopicMessage[], boolean] + ) { + expect(singleResult[0].properties).toEqual(dualResult[0].properties) + // make sure message exists and has same topic + expect(singleResult[1][0]?.topic).toEqual(dualResult[1][0]?.topic) + expect(singleResult[2]).toEqual(dualResult[2]) + } + it('happy path updatePerson()', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + const singleUpdateResult = await singleWriteRepository.updatePerson( + singleCreatePersonResult.person, + { properties: { name: 'B' } }, + 'single-update' + ) + const dualUpdateResult = await dualWriteRepository.updatePerson( + dualCreatePersonResult.person, + { properties: { name: 'B' } }, + 'dual-update' + ) + assertUpdatePersonContractParity(singleUpdateResult, dualUpdateResult) + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-update', + 'verify-secondary-update' + ) + }) + it('updatePerson() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'updatePerson', + () => + singleWriteRepository.updatePerson( + singleCreatePersonResult.person, + { properties: { name: 'B' } }, + 'single-update' + ), + () => + dualWriteRepository.updatePerson( + dualCreatePersonResult.person, + { properties: { name: 'B' } }, + 'dual-update' + ), + 'unhandled database error' + ) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-update', + 'verify-secondary-update' + ) + }) + it('updatePerson() PersonPropertiesSizeViolation update attempts to violate the limit', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + await assertConsistentDatabaseErrorHandling( + postgres, + { message: 'Check constraint violation', code: '23514', constraint: 'check_properties_size' }, + 'updatePerson', + () => + singleWriteRepository.updatePerson( + singleCreatePersonResult.person, + { properties: { name: 'B' } }, + 'single-update' + ), + () => + dualWriteRepository.updatePerson( + dualCreatePersonResult.person, + { properties: { name: 'B' } }, + 'dual-update' + ), + PersonPropertiesSizeViolationError as any + ) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-update', + 'verify-secondary-update' + ) + }) + it('updatePerson() PersonPropertiesSizeViolation existing properties trimmed', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + const spySizeSingle = jest + .spyOn(singleWriteRepository as any, 'personPropertiesSize') + .mockResolvedValue(70000000) + + const spySizeDual = jest + .spyOn((dualWriteRepository as any).primaryRepo, 'personPropertiesSize') + .mockResolvedValue(70000000) + + const originalQueryPrimary = postgres.query.bind(postgres) + let primaryUpdateCallCount = 0 + let sawRemediationTag = false + const mockQueryPrimary = jest + .spyOn(postgres, 'query') + .mockImplementation(async (use, query, values, tag) => { + if (typeof query === 'string' && query.includes('UPDATE posthog_person SET')) { + primaryUpdateCallCount++ + if (typeof tag === 'string' && tag.includes('oversized_properties_remediation')) { + sawRemediationTag = true + return originalQueryPrimary(use, query, values, tag) + } + const error: any = new Error('Check constraint violation') + error.code = '23514' + error.constraint = 'check_properties_size' + throw error + } + return originalQueryPrimary(use, query, values, tag) + }) + + const updateToApply = { + properties: { + $app_name: 'Application name with detailed information', + $app_version: 'Version 1.2.3 with build metadata', + }, + } + const singleUpdateResult = await singleWriteRepository.updatePerson( + singleCreatePersonResult.person, + updateToApply, + 'single-update' + ) + const dualUpdateResult = await dualWriteRepository.updatePerson( + dualCreatePersonResult.person, + updateToApply, + 'dual-update' + ) + + expect(primaryUpdateCallCount).toBe(4) + expect(sawRemediationTag).toBe(true) + + assertUpdatePersonContractParity(singleUpdateResult, dualUpdateResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-update', + 'verify-secondary-update' + ) + mockQueryPrimary.mockRestore() + spySizeSingle.mockRestore() + spySizeDual.mockRestore() + }) + }) + + describe('deletePerson() is compatible between single and dual write and consistent between primary and secondary', () => { + function assertDeletePersonContractParity(singleResult: TopicMessage[], dualResult: TopicMessage[]) { + expect(singleResult.length).toEqual(dualResult.length) + expect(singleResult[0].topic).toEqual(dualResult[0].topic) + } + it('happy path deletePerson()', async () => { + const team = await getFirstTeam(postgres) + const [singleCreatePersonResult, dualCreatePersonResult] = await Promise.all([ + singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + TEST_UUIDS.single, + [] + ), + dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + TEST_UUIDS.dual, + [] + ), + ]) + + if (!singleCreatePersonResult.success || !dualCreatePersonResult.success) { + throw new Error('Failed to create test persons') + } + + const singleDeleteResult = await singleWriteRepository.deletePerson(singleCreatePersonResult.person) + const dualDeleteResult = await dualWriteRepository.deletePerson(dualCreatePersonResult.person) + + assertDeletePersonContractParity(singleDeleteResult, dualDeleteResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-delete', + 'verify-secondary-delete' + ) + }) + it('deletePerson() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + const [singleCreatePersonResult, dualCreatePersonResult] = await Promise.all([ + singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + '33333333-3333-3333-3333-333333333333', + [] + ), + dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + '44444444-4444-4444-4444-444444444444', + [] + ), + ]) + + if (!singleCreatePersonResult.success || !dualCreatePersonResult.success) { + throw new Error('Failed to create test persons') + } + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'deletePerson', + () => singleWriteRepository.deletePerson(singleCreatePersonResult.person), + () => dualWriteRepository.deletePerson(dualCreatePersonResult.person), + 'unhandled database error' + ) + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [TEST_UUIDS.dual], + 'verify-primary-delete', + 'verify-secondary-delete' + ) + }) + it('deletePerson() deadlock detected', async () => { + const team = await getFirstTeam(postgres) + const [singleCreatePersonResult, dualCreatePersonResult] = await Promise.all([ + singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + '55555555-5555-5555-5555-555555555555', + [] + ), + dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'A' }, + {}, + {}, + team.id, + null, + false, + '66666666-6666-6666-6666-666666666666', + [] + ), + ]) + + if (!singleCreatePersonResult.success || !dualCreatePersonResult.success) { + throw new Error('Failed to create test persons') + } + + await assertConsistentDatabaseErrorHandling( + postgres, + { message: 'deadlock detected', code: '40P01' }, + 'deletePerson', + () => singleWriteRepository.deletePerson(singleCreatePersonResult.person), + () => dualWriteRepository.deletePerson(dualCreatePersonResult.person), + 'deadlock detected' + ) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT * FROM posthog_person WHERE uuid = $1', + ['66666666-6666-6666-6666-666666666666'], + 'verify-primary-delete', + 'verify-secondary-delete' + ) + }) + }) + + describe('fetchPerson() is compatible between single and dual write', () => { + function assertFetchPersonContractParity( + singleResult: InternalPerson | undefined, + dualResult: InternalPerson | undefined + ) { + if (singleResult === undefined) { + expect(dualResult).toBeUndefined() + } else if (dualResult === undefined) { + expect(singleResult).toBeUndefined() + } else { + expect(singleResult.properties).toEqual(dualResult.properties) + expect(singleResult.team_id).toEqual(dualResult.team_id) + expect(singleResult.is_identified).toEqual(dualResult.is_identified) + } + } + + it('happy path fetchPerson()', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: _singleCreatePersonResult, dualResult: _dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + const singleFetchResult = await singleWriteRepository.fetchPerson(team.id, 'single-a') + const dualFetchResult = await dualWriteRepository.fetchPerson(team.id, 'dual-a') + + assertFetchPersonContractParity(singleFetchResult, dualFetchResult) + }) + + it('fetchPerson() returns undefined for non-existent person', async () => { + const team = await getFirstTeam(postgres) + + const singleFetchResult = await singleWriteRepository.fetchPerson(team.id, 'non-existent') + const dualFetchResult = await dualWriteRepository.fetchPerson(team.id, 'non-existent') + + assertFetchPersonContractParity(singleFetchResult, dualFetchResult) + expect(singleFetchResult).toBeUndefined() + expect(dualFetchResult).toBeUndefined() + }) + + it('fetchPerson() with forUpdate lock', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: _singleCreatePersonResult, dualResult: _dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + const singleFetchResult = await singleWriteRepository.fetchPerson(team.id, 'single-a', { forUpdate: true }) + const dualFetchResult = await dualWriteRepository.fetchPerson(team.id, 'dual-a', { forUpdate: true }) + + assertFetchPersonContractParity(singleFetchResult, dualFetchResult) + }) + + it('fetchPerson() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + await createPersonsInBothRepos(team) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'fetchPerson', + () => singleWriteRepository.fetchPerson(team.id, 'single-a'), + () => dualWriteRepository.fetchPerson(team.id, 'dual-a'), + 'unhandled database error' + ) + }) + }) + + describe('addDistinctId() is compatible between single and dual write and consistent between primary and secondary', () => { + function assertAddDistinctIdContractParity(singleResult: TopicMessage[], dualResult: TopicMessage[]) { + expect(singleResult.length).toEqual(dualResult.length) + if (singleResult.length > 0) { + expect(singleResult[0].topic).toEqual(dualResult[0].topic) + } + } + + it('happy path addDistinctId()', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + const singleAddResult = await singleWriteRepository.addDistinctId( + singleCreatePersonResult.person, + 'single-b', + 1 + ) + const dualAddResult = await dualWriteRepository.addDistinctId(dualCreatePersonResult.person, 'dual-b', 1) + + assertAddDistinctIdContractParity(singleAddResult, dualAddResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT COUNT(*) as count FROM posthog_persondistinctid WHERE person_id = (SELECT id FROM posthog_person WHERE uuid = $1)', + [TEST_UUIDS.dual], + 'verify-primary-add-distinct-id', + 'verify-secondary-add-distinct-id' + ) + }) + + it('addDistinctId() duplicate distinct id conflict', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + let singleError: any + try { + await singleWriteRepository.addDistinctId(singleCreatePersonResult.person, 'single-a', 1) + } catch (e) { + singleError = e + } + + let dualError: any + try { + await dualWriteRepository.addDistinctId(dualCreatePersonResult.person, 'dual-a', 1) + } catch (e) { + dualError = e + } + + expect(singleError).toBeDefined() + expect(dualError).toBeDefined() + expect(singleError.message).toContain('unique') + expect(dualError.message).toContain('unique') + }) + + it('addDistinctId() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'addDistinctId', + () => singleWriteRepository.addDistinctId(singleCreatePersonResult.person, 'single-c', 1), + () => dualWriteRepository.addDistinctId(dualCreatePersonResult.person, 'dual-c', 1), + 'unhandled database error' + ) + }) + }) + + describe('moveDistinctIds() is compatible between single and dual write and consistent between primary and secondary', () => { + function assertMoveDistinctIdsContractParity( + singleResult: { success: boolean; messages?: TopicMessage[]; distinctIdsMoved?: string[]; error?: string }, + dualResult: { success: boolean; messages?: TopicMessage[]; distinctIdsMoved?: string[]; error?: string } + ) { + expect(singleResult.success).toEqual(dualResult.success) + if (singleResult.success) { + expect(singleResult.messages?.length).toEqual(dualResult.messages?.length) + expect(singleResult.distinctIdsMoved?.length).toEqual(dualResult.distinctIdsMoved?.length) + } else { + // When there's an error, check the error matches + expect(singleResult.error).toEqual(dualResult.error) + } + } + + it('happy path moveDistinctIds()', async () => { + const team = await getFirstTeam(postgres) + + // Create source persons with distinct IDs to move + const sourceSingleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Source' }, + {}, + {}, + team.id, + null, + false, + '33333333-3333-3333-3333-333333333333', + [{ distinctId: 'single-source', version: 0 }] + ) + const sourceDualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Source' }, + {}, + {}, + team.id, + null, + false, + '44444444-4444-4444-4444-444444444444', + [{ distinctId: 'dual-source', version: 0 }] + ) + + // Create target persons + const targetSingleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Target' }, + {}, + {}, + team.id, + null, + false, + '55555555-5555-5555-5555-555555555555', + [{ distinctId: 'single-target', version: 0 }] + ) + const targetDualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Target' }, + {}, + {}, + team.id, + null, + false, + '66666666-6666-6666-6666-666666666666', + [{ distinctId: 'dual-target', version: 0 }] + ) + + // Ensure all persons were created successfully + if ( + !sourceSingleResult.success || + !targetSingleResult.success || + !sourceDualResult.success || + !targetDualResult.success + ) { + throw new Error('Failed to create test persons') + } + + const singleMoveResult = await singleWriteRepository.moveDistinctIds( + sourceSingleResult.person, + targetSingleResult.person + ) + const dualMoveResult = await dualWriteRepository.moveDistinctIds( + sourceDualResult.person, + targetDualResult.person + ) + + assertMoveDistinctIdsContractParity(singleMoveResult, dualMoveResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT COUNT(*) as count FROM posthog_persondistinctid WHERE person_id = (SELECT id FROM posthog_person WHERE uuid = $1)', + ['66666666-6666-6666-6666-666666666666'], + 'verify-primary-move-distinct-ids', + 'verify-secondary-move-distinct-ids' + ) + }) + + it('moveDistinctIds() foreign key constraint error when target deleted', async () => { + const team = await getFirstTeam(postgres) + + // Create source persons + const sourceSingleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Source' }, + {}, + {}, + team.id, + null, + false, + '77777777-7777-7777-7777-777777777777', + [{ distinctId: 'single-source-fk', version: 0 }] + ) + const sourceDualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Source' }, + {}, + {}, + team.id, + null, + false, + '88888888-8888-8888-8888-888888888888', + [{ distinctId: 'dual-source-fk', version: 0 }] + ) + + // Ensure all persons were created successfully + if (!sourceSingleResult.success || !sourceDualResult.success) { + throw new Error('Failed to create test persons') + } + + // Create mock target persons (will simulate them being deleted) + const targetSingle = { + ...sourceSingleResult.person, + id: '999999', + uuid: '99999999-9999-9999-9999-999999999999', + } + const targetDual = { + ...sourceDualResult.person, + id: '999998', + uuid: '99999999-9999-9999-9999-999999999998', + } + + // moveDistinctIds catches foreign key errors and returns { success: false, error: 'TargetNotFound' } + // So we test that both repositories handle this case the same way + const singleMockQuery = mockDatabaseError( + postgres, + { message: 'insert or update on table "posthog_persondistinctid" violates foreign key constraint' }, + 'updateDistinctIdPerson' + ) + const singleResult = await singleWriteRepository.moveDistinctIds(sourceSingleResult.person, targetSingle) + singleMockQuery.mockRestore() + + // For dual-write, we need to mock both databases to fail to avoid prepared transaction issues + const dualMockPrimary = mockDatabaseError( + postgres, + { message: 'insert or update on table "posthog_persondistinctid" violates foreign key constraint' }, + 'updateDistinctIdPerson' + ) + const dualMockSecondary = mockDatabaseError( + migrationPostgres, + { message: 'insert or update on table "posthog_persondistinctid" violates foreign key constraint' }, + 'updateDistinctIdPerson' + ) + const dualResult = await dualWriteRepository.moveDistinctIds(sourceDualResult.person, targetDual) + dualMockPrimary.mockRestore() + dualMockSecondary.mockRestore() + + // Both should return the same error result + assertMoveDistinctIdsContractParity(singleResult, dualResult) + expect(singleResult.success).toBe(false) + expect(dualResult.success).toBe(false) + expect((singleResult as any).error).toBe('TargetNotFound') + expect((dualResult as any).error).toBe('TargetNotFound') + }) + + it('moveDistinctIds() with limit parameter', async () => { + const team = await getFirstTeam(postgres) + + const sourceSingleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Source with many IDs' }, + {}, + {}, + team.id, + null, + false, + '77777777-7777-7777-7777-777777777777', + [{ distinctId: 'single-source-limit', version: 0 }] + ) + const sourceDualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Source with many IDs' }, + {}, + {}, + team.id, + null, + false, + '88888888-8888-8888-8888-888888888888', + [{ distinctId: 'dual-source-limit', version: 0 }] + ) + + const targetSingleResult = await singleWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Target for limit test' }, + {}, + {}, + team.id, + null, + false, + '99999999-9999-9999-9999-999999999999', + [{ distinctId: 'single-target-limit', version: 0 }] + ) + const targetDualResult = await dualWriteRepository.createPerson( + TEST_TIMESTAMP, + { name: 'Target for limit test' }, + {}, + {}, + team.id, + null, + false, + 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', + [{ distinctId: 'dual-target-limit', version: 0 }] + ) + + // Ensure all persons were created successfully + if ( + !sourceSingleResult.success || + !targetSingleResult.success || + !sourceDualResult.success || + !targetDualResult.success + ) { + throw new Error('Failed to create test persons') + } + + await singleWriteRepository.addDistinctId(sourceSingleResult.person, 'single-extra-1', 1) + await singleWriteRepository.addDistinctId(sourceSingleResult.person, 'single-extra-2', 1) + await singleWriteRepository.addDistinctId(sourceSingleResult.person, 'single-extra-3', 1) + + await dualWriteRepository.addDistinctId(sourceDualResult.person, 'dual-extra-1', 1) + await dualWriteRepository.addDistinctId(sourceDualResult.person, 'dual-extra-2', 1) + await dualWriteRepository.addDistinctId(sourceDualResult.person, 'dual-extra-3', 1) + + const singleMoveResult = await singleWriteRepository.moveDistinctIds( + sourceSingleResult.person, + targetSingleResult.person, + 2 + ) + const dualMoveResult = await dualWriteRepository.moveDistinctIds( + sourceDualResult.person, + targetDualResult.person, + 2 + ) + + assertMoveDistinctIdsContractParity(singleMoveResult, dualMoveResult) + + expect(singleMoveResult.success).toBe(true) + expect(dualMoveResult.success).toBe(true) + + if (singleMoveResult.success && dualMoveResult.success) { + expect(singleMoveResult.distinctIdsMoved).toHaveLength(2) + expect(dualMoveResult.distinctIdsMoved).toHaveLength(2) + expect(singleMoveResult.messages).toHaveLength(2) + expect(dualMoveResult.messages).toHaveLength(2) + } + + const singleRemainingResult = await postgres.query( + PostgresUse.PERSONS_WRITE, + 'SELECT COUNT(*) as count FROM posthog_persondistinctid WHERE person_id = (SELECT id FROM posthog_person WHERE uuid = $1)', + ['77777777-7777-7777-7777-777777777777'], + 'countSingleRemaining' + ) + expect(parseInt(singleRemainingResult.rows[0].count)).toBe(2) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT COUNT(*) as count FROM posthog_persondistinctid WHERE person_id = (SELECT id FROM posthog_person WHERE uuid = $1)', + ['88888888-8888-8888-8888-888888888888'], + 'verify-primary-source-remaining', + 'verify-secondary-source-remaining' + ) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT COUNT(*) as count FROM posthog_persondistinctid WHERE person_id = (SELECT id FROM posthog_person WHERE uuid = $1)', + ['aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'], + 'verify-primary-target-received', + 'verify-secondary-target-received' + ) + }) + + it('moveDistinctIds() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'updateDistinctIdPerson', + () => + singleWriteRepository.moveDistinctIds( + singleCreatePersonResult.person, + singleCreatePersonResult.person + ), + () => dualWriteRepository.moveDistinctIds(dualCreatePersonResult.person, dualCreatePersonResult.person), + 'unhandled database error' + ) + }) + }) + + describe('addPersonlessDistinctId() is compatible between single and dual write and consistent between primary and secondary', () => { + function assertAddPersonlessDistinctIdContractParity(singleResult: boolean, dualResult: boolean) { + expect(singleResult).toEqual(dualResult) + } + + it('happy path addPersonlessDistinctId()', async () => { + const team = await getFirstTeam(postgres) + + const singleResult = await singleWriteRepository.addPersonlessDistinctId(team.id, 'single-personless') + const dualResult = await dualWriteRepository.addPersonlessDistinctId(team.id, 'dual-personless') + + assertAddPersonlessDistinctIdContractParity(singleResult, dualResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dual-personless'], + 'verify-primary-add-personless', + 'verify-secondary-add-personless' + ) + }) + + it('addPersonlessDistinctId() existing distinct id', async () => { + const team = await getFirstTeam(postgres) + + // Add the same distinct ID twice + await singleWriteRepository.addPersonlessDistinctId(team.id, 'single-personless-dup') + await dualWriteRepository.addPersonlessDistinctId(team.id, 'dual-personless-dup') + + const singleResult = await singleWriteRepository.addPersonlessDistinctId(team.id, 'single-personless-dup') + const dualResult = await dualWriteRepository.addPersonlessDistinctId(team.id, 'dual-personless-dup') + + assertAddPersonlessDistinctIdContractParity(singleResult, dualResult) + }) + + it('addPersonlessDistinctId() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'addPersonlessDistinctId', + () => singleWriteRepository.addPersonlessDistinctId(team.id, 'single-error'), + () => dualWriteRepository.addPersonlessDistinctId(team.id, 'dual-error'), + 'unhandled database error' + ) + }) + }) + + describe('addPersonlessDistinctIdForMerge() is compatible between single and dual write and consistent between primary and secondary', () => { + function assertAddPersonlessDistinctIdForMergeContractParity(singleResult: boolean, dualResult: boolean) { + expect(singleResult).toEqual(dualResult) + } + + it('happy path addPersonlessDistinctIdForMerge()', async () => { + const team = await getFirstTeam(postgres) + + const singleResult = await singleWriteRepository.addPersonlessDistinctIdForMerge(team.id, 'single-merge') + const dualResult = await dualWriteRepository.addPersonlessDistinctIdForMerge(team.id, 'dual-merge') + + assertAddPersonlessDistinctIdForMergeContractParity(singleResult, dualResult) + + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dual-merge'], + 'verify-primary-add-merge', + 'verify-secondary-add-merge' + ) + }) + + it('addPersonlessDistinctIdForMerge() updates existing non-merged', async () => { + const team = await getFirstTeam(postgres) + + // First add as non-merged + await singleWriteRepository.addPersonlessDistinctId(team.id, 'single-to-merge') + await dualWriteRepository.addPersonlessDistinctId(team.id, 'dual-to-merge') + + // Then update to merged + const singleResult = await singleWriteRepository.addPersonlessDistinctIdForMerge(team.id, 'single-to-merge') + const dualResult = await dualWriteRepository.addPersonlessDistinctIdForMerge(team.id, 'dual-to-merge') + + assertAddPersonlessDistinctIdForMergeContractParity(singleResult, dualResult) + expect(singleResult).toBe(false) // False because it was an update, not an insert + expect(dualResult).toBe(false) + + // Verify is_merged is now true + const singleCheck = await postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'single-to-merge'], + 'check-single-merge' + ) + const dualCheck = await migrationPostgres.query( + PostgresUse.PERSONS_READ, + 'SELECT is_merged FROM posthog_personlessdistinctid WHERE team_id = $1 AND distinct_id = $2', + [team.id, 'dual-to-merge'], + 'check-dual-merge' + ) + expect(singleCheck.rows[0].is_merged).toBe(true) + expect(dualCheck.rows[0].is_merged).toBe(true) + }) + + it('addPersonlessDistinctIdForMerge() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + + await assertConsistentDatabaseErrorHandling( + postgres, + new Error('unhandled database error'), + 'addPersonlessDistinctIdForMerge', + () => singleWriteRepository.addPersonlessDistinctIdForMerge(team.id, 'single-merge-error'), + () => dualWriteRepository.addPersonlessDistinctIdForMerge(team.id, 'dual-merge-error'), + 'unhandled database error' + ) + }) + }) + + describe('updateCohortsAndFeatureFlagsForMerge() is compatible between single and dual write and consistent between primary and secondary', () => { + it('happy path updateCohortsAndFeatureFlagsForMerge()', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + // This method doesn't return anything, just verify it doesn't error + await expect( + singleWriteRepository.updateCohortsAndFeatureFlagsForMerge( + team.id, + singleCreatePersonResult.person.id, + singleCreatePersonResult.person.id + ) + ).resolves.not.toThrow() + + await expect( + dualWriteRepository.updateCohortsAndFeatureFlagsForMerge( + team.id, + dualCreatePersonResult.person.id, + dualCreatePersonResult.person.id + ) + ).resolves.not.toThrow() + + // Verify consistency in cohort tables + await assertConsistencyAcrossDatabases( + postgres, + migrationPostgres, + 'SELECT COUNT(*) as count FROM posthog_cohortpeople', + [], + 'verify-primary-cohort', + 'verify-secondary-cohort' + ) + }) + + it('updateCohortsAndFeatureFlagsForMerge() unhandled database error', async () => { + const team = await getFirstTeam(postgres) + const { singleResult: singleCreatePersonResult, dualResult: dualCreatePersonResult } = + await createPersonsInBothRepos(team) + + // This method doesn't have special error handling, it just throws + // We need to mock the specific query tag that this method uses: 'updateCohortAndFeatureFlagsPeople' + const singleMockQuery = mockDatabaseError( + postgres, + new Error('unhandled database error'), + 'updateCohortAndFeatureFlagsPeople' + ) + let singleError: any + try { + await singleWriteRepository.updateCohortsAndFeatureFlagsForMerge( + team.id, + singleCreatePersonResult.person.id, + singleCreatePersonResult.person.id + ) + } catch (e) { + singleError = e + } + singleMockQuery.mockRestore() + + const dualMockQuery = mockDatabaseError( + postgres, + new Error('unhandled database error'), + 'updateCohortAndFeatureFlagsPeople' + ) + let dualError: any + try { + await dualWriteRepository.updateCohortsAndFeatureFlagsForMerge( + team.id, + dualCreatePersonResult.person.id, + dualCreatePersonResult.person.id + ) + } catch (e) { + dualError = e + } + dualMockQuery.mockRestore() + + // Both should throw the same error + expect(singleError).toBeDefined() + expect(dualError).toBeDefined() + expect(singleError.message).toBe('unhandled database error') + expect(dualError.message).toBe('unhandled database error') + }) + }) +}) diff --git a/plugin-server/src/worker/ingestion/persons/repositories/test-helpers.ts b/plugin-server/src/worker/ingestion/persons/repositories/test-helpers.ts new file mode 100644 index 0000000000..85721aecbf --- /dev/null +++ b/plugin-server/src/worker/ingestion/persons/repositories/test-helpers.ts @@ -0,0 +1,184 @@ +import fs from 'fs' +import { DateTime } from 'luxon' +import path from 'path' + +import { Hub, Team } from '~/types' +import { PostgresRouter, PostgresUse } from '~/utils/db/postgres' + +import { CreatePersonResult } from '../../../../utils/db/db' + +export const TEST_UUIDS = { + single: '11111111-1111-1111-1111-111111111111', + dual: '22222222-2222-2222-2222-222222222222', +} + +export const TEST_TIMESTAMP = DateTime.fromISO('2024-01-15T10:30:00.000Z').toUTC() + +export async function setupMigrationDb(migrationPostgres: PostgresRouter): Promise { + const drops = [ + 'posthog_featureflaghashkeyoverride', + 'posthog_cohortpeople', + 'posthog_persondistinctid', + 'posthog_personlessdistinctid', + 'posthog_person', + ] + for (const table of drops) { + await migrationPostgres.query( + PostgresUse.PERSONS_WRITE, + `DROP TABLE IF EXISTS ${table} CASCADE`, + [], + `drop-${table}` + ) + } + const sqlPath = path.resolve(__dirname, '../../../../../sql/create_persons_tables.sql') + const sql = fs.readFileSync(sqlPath, 'utf8') + await migrationPostgres.query(PostgresUse.PERSONS_WRITE, sql, [], 'create-persons-schema-secondary') +} + +export async function cleanupPrepared(hub: Hub) { + const routers = [hub.db.postgres, hub.db.postgresPersonMigration] + for (const r of routers) { + const res = await r.query( + PostgresUse.PERSONS_WRITE, + `SELECT gid FROM pg_prepared_xacts WHERE gid LIKE 'dualwrite:%'`, + [], + 'list-prepared' + ) + for (const row of res.rows) { + await r.query( + PostgresUse.PERSONS_WRITE, + `ROLLBACK PREPARED '${String(row.gid).replace(/'/g, "''")}'`, + [], + 'rollback-prepared' + ) + } + } +} + +export async function getFirstTeam(postgres: PostgresRouter): Promise { + const teams = await postgres.query( + PostgresUse.COMMON_WRITE, + 'SELECT * FROM posthog_team LIMIT 1', + [], + 'getFirstTeam' + ) + return teams.rows[0] +} + +export async function assertConsistencyAcrossDatabases( + primaryRouter: PostgresRouter, + secondaryRouter: PostgresRouter, + query: string, + params: any[], + primaryTag: string, + secondaryTag: string +) { + const [primary, secondary] = await Promise.all([ + primaryRouter.query(PostgresUse.PERSONS_READ, query, params, primaryTag), + secondaryRouter.query(PostgresUse.PERSONS_READ, query, params, secondaryTag), + ]) + expect(primary.rows).toEqual(secondary.rows) +} + +export function mockDatabaseError( + router: PostgresRouter, + error: Error | { message: string; code?: string; constraint?: string }, + tagPattern: string | RegExp +) { + const originalQuery = router.query.bind(router) + return jest.spyOn(router, 'query').mockImplementation((use: any, text: any, params: any, tag: string) => { + const shouldThrow = + typeof tagPattern === 'string' ? tag && tag.startsWith(tagPattern) : tag && tagPattern.test(tag) + + if (shouldThrow) { + if (error instanceof Error) { + throw error + } else { + const e: any = new Error(error.message) + if (error.code) { + e.code = error.code + } + if ((error as any).constraint) { + e.constraint = (error as any).constraint + } + throw e + } + } + return originalQuery(use, text, params, tag) + }) +} + +export async function assertConsistentDatabaseErrorHandling( + postgres: PostgresRouter, + error: Error | { message: string; code?: string; constraint?: string }, + tagPattern: string | RegExp, + singleWriteOperation: () => Promise, + dualWriteOperation: () => Promise, + expectedError?: string | RegExp | ErrorConstructor +) { + const singleSpy = mockDatabaseError(postgres, error, tagPattern) + let singleError: any + try { + await singleWriteOperation() + } catch (e) { + singleError = e + } + singleSpy.mockRestore() + + const dualSpy = mockDatabaseError(postgres, error, tagPattern) + let dualError: any + try { + await dualWriteOperation() + } catch (e) { + dualError = e + } + dualSpy.mockRestore() + + if (expectedError) { + expect(singleError).toBeDefined() + expect(dualError).toBeDefined() + + if (typeof expectedError === 'string') { + expect(singleError.message).toContain(expectedError) + expect(dualError.message).toContain(expectedError) + } else if (expectedError instanceof RegExp) { + expect(singleError.message).toMatch(expectedError) + expect(dualError.message).toMatch(expectedError) + } else { + expect(singleError).toBeInstanceOf(expectedError) + expect(dualError).toBeInstanceOf(expectedError) + } + } else { + expect(singleError).toBeDefined() + expect(dualError).toBeDefined() + expect(singleError.message).toBe(dualError.message) + if ((error as any).code) { + expect(singleError.code).toBe((error as any).code) + expect(dualError.code).toBe((error as any).code) + } + if ((error as any).constraint) { + expect(singleError.constraint).toBe((error as any).constraint) + expect(dualError.constraint).toBe((error as any).constraint) + } + } +} + +export function assertCreatePersonContractParity(singleResult: CreatePersonResult, dualResult: CreatePersonResult) { + expect(singleResult.success).toBe(true) + expect(dualResult.success).toBe(true) + if (singleResult.success && dualResult.success) { + expect(singleResult.person.properties).toEqual(dualResult.person.properties) + } +} + +export function assertCreatePersonConflictContractParity( + singleResult: CreatePersonResult, + dualResult: CreatePersonResult +) { + expect(singleResult.success).toBe(false) + expect(dualResult.success).toBe(false) + if (!singleResult.success && !dualResult.success) { + expect(singleResult.error).toBe(dualResult.error) + expect(singleResult.distinctIds).toEqual(dualResult.distinctIds) + } +} diff --git a/plugin-server/tests/worker/ingestion/person-state-batch.dualwrite.test.ts b/plugin-server/tests/worker/ingestion/person-state-batch.dualwrite.test.ts new file mode 100644 index 0000000000..98570ad751 --- /dev/null +++ b/plugin-server/tests/worker/ingestion/person-state-batch.dualwrite.test.ts @@ -0,0 +1,1102 @@ +import { KafkaProducerObserver } from '~/tests/helpers/mocks/producer.spy' + +import { DateTime } from 'luxon' + +import { PluginEvent } from '@posthog/plugin-scaffold' + +import { Clickhouse } from '~/tests/helpers/clickhouse' + +import { Hub, InternalPerson, Team } from '../../../src/types' +import { closeHub, createHub } from '../../../src/utils/db/hub' +import { PostgresUse } from '../../../src/utils/db/postgres' +import { defaultRetryConfig } from '../../../src/utils/retries' +import { UUIDT } from '../../../src/utils/utils' +import { uuidFromDistinctId } from '../../../src/worker/ingestion/person-uuid' +import { BatchWritingPersonsStoreForBatch } from '../../../src/worker/ingestion/persons/batch-writing-person-store' +import { PersonContext } from '../../../src/worker/ingestion/persons/person-context' +import { PersonEventProcessor } from '../../../src/worker/ingestion/persons/person-event-processor' +import { PersonMergeService } from '../../../src/worker/ingestion/persons/person-merge-service' +import { PersonPropertyService } from '../../../src/worker/ingestion/persons/person-property-service' +import { PostgresDualWritePersonRepository } from '../../../src/worker/ingestion/persons/repositories/postgres-dualwrite-person-repository' +import { PostgresPersonRepository } from '../../../src/worker/ingestion/persons/repositories/postgres-person-repository' +import { cleanupPrepared, setupMigrationDb } from '../../../src/worker/ingestion/persons/repositories/test-helpers' +import { createOrganization, createTeam, fetchPostgresPersons, getTeam, resetTestDatabase } from '../../helpers/sql' + +jest.setTimeout(30000) + +describe('PersonState dual-write compatibility', () => { + let hub: Hub + let clickhouse: Clickhouse + let singleWriteRepository: PostgresPersonRepository + let dualWriteRepository: PostgresDualWritePersonRepository + let mockProducerObserver: KafkaProducerObserver + + let teamId: number + let mainTeam: Team + let organizationId: string + + let timestamp: DateTime + + beforeAll(async () => { + hub = await createHub({}) + mockProducerObserver = new KafkaProducerObserver(hub.kafkaProducer) + mockProducerObserver.resetKafkaProducer() + + clickhouse = Clickhouse.create() + await clickhouse.exec('SYSTEM STOP MERGES') + }) + + beforeEach(async () => { + await resetTestDatabase(undefined, {}, {}, { withExtendedTestData: false }) + await setupMigrationDb(hub.db.postgresPersonMigration) + + organizationId = await createOrganization(hub.db.postgres) + teamId = await createTeam(hub.db.postgres, organizationId) + mainTeam = (await getTeam(hub, teamId))! + timestamp = DateTime.fromISO('2020-01-01T12:00:05.200Z').toUTC() + + singleWriteRepository = new PostgresPersonRepository(hub.db.postgres) + dualWriteRepository = new PostgresDualWritePersonRepository(hub.db.postgres, hub.db.postgresPersonMigration) + + defaultRetryConfig.RETRY_INTERVAL_DEFAULT = 0 + + const redis = await hub.redisPool.acquire() + await redis.flushdb() + await hub.redisPool.release(redis) + }) + + afterEach(async () => { + await cleanupPrepared(hub) + jest.clearAllTimers() + jest.useRealTimers() + jest.restoreAllMocks() + }) + + afterAll(async () => { + await closeHub(hub) + await clickhouse.exec('SYSTEM START MERGES') + clickhouse.close() + }) + + function createPersonProcessor( + repository: PostgresPersonRepository | PostgresDualWritePersonRepository, + event: Partial, + processPerson = true, + timestampParam = timestamp, + team = mainTeam + ) { + const fullEvent = { + team_id: teamId, + properties: {}, + ...event, + } + + const personsStore = new BatchWritingPersonsStoreForBatch(repository, hub.db.kafkaProducer) + + const context = new PersonContext( + fullEvent as PluginEvent, + team, + event.distinct_id!, + timestampParam, + processPerson, + hub.db.kafkaProducer, + personsStore, + 0 + ) + + const processor = new PersonEventProcessor( + context, + new PersonPropertyService(context), + new PersonMergeService(context) + ) + + return { processor, personsStore } + } + + async function fetchPostgresPersonsH() { + return await fetchPostgresPersons(hub.db, teamId) + } + + describe('Basic person creation', () => { + it('creates person identically with single-write and dual-write repositories', async () => { + const singleDistinctId = 'single-test-user-1' + const dualDistinctId = 'dual-test-user-1' + + const singleEvent: Partial = { + distinct_id: singleDistinctId, + properties: { + $set: { name: 'Test User', email: 'test@example.com' }, + }, + } + + const dualEvent: Partial = { + distinct_id: dualDistinctId, + properties: { + $set: { name: 'Test User', email: 'test@example.com' }, + }, + } + + const { processor: singleProcessor, personsStore: singleStore } = createPersonProcessor( + singleWriteRepository, + singleEvent + ) + const { processor: dualProcessor, personsStore: dualStore } = createPersonProcessor( + dualWriteRepository, + dualEvent + ) + + const [singleResult, dualResult] = await Promise.all([ + singleProcessor.processEvent(), + dualProcessor.processEvent(), + ]) + + await Promise.all([singleStore.flush(), dualStore.flush()]) + + expect(singleResult).toBeDefined() + expect(dualResult).toBeDefined() + + const postgresPersons = await fetchPostgresPersonsH() + expect(postgresPersons.length).toBe(2) + + const singlePerson = postgresPersons.find( + (p: InternalPerson) => p.uuid === uuidFromDistinctId(teamId, singleDistinctId) + ) + const dualPerson = postgresPersons.find( + (p: InternalPerson) => p.uuid === uuidFromDistinctId(teamId, dualDistinctId) + ) + + expect(singlePerson).toBeDefined() + expect(dualPerson).toBeDefined() + expect(singlePerson?.properties).toEqual({ name: 'Test User', email: 'test@example.com' }) + expect(dualPerson?.properties).toEqual({ name: 'Test User', email: 'test@example.com' }) + }) + + it('handles concurrent person creation without errors', async () => { + const distinctIds = ['user-1', 'user-2', 'user-3'] + + const createWithRepo = async (repo: any, distinctId: string) => { + const event: Partial = { + distinct_id: distinctId, + properties: { + $set: { id: distinctId }, + }, + } + + const { processor, personsStore } = createPersonProcessor(repo, event) + await processor.processEvent() + await personsStore.flush() + } + + const singlePromises = distinctIds.map((id) => createWithRepo(singleWriteRepository, `single-${id}`)) + const dualPromises = distinctIds.map((id) => createWithRepo(dualWriteRepository, `dual-${id}`)) + + await expect(Promise.all([...singlePromises, ...dualPromises])).resolves.not.toThrow() + + const postgresPersons = await fetchPostgresPersonsH() + expect(postgresPersons.length).toBe(6) + }) + }) + + describe('Person property updates', () => { + it('updates properties identically with single-write and dual-write repositories', async () => { + const singleDistinctId = 'single-update-test-user' + const dualDistinctId = 'dual-update-test-user' + const singleUuid = new UUIDT().toString() + const dualUuid = new UUIDT().toString() + + const createPerson = async (repo: any, distinctId: string, uuid: string) => { + const result = await repo.createPerson( + timestamp, + { initial: 'value' }, + {}, + {}, + teamId, + null, + false, + uuid, + [{ distinctId, version: 0 }] + ) + return result.person + } + + const [singlePerson, dualPerson] = await Promise.all([ + createPerson(singleWriteRepository, singleDistinctId, singleUuid), + createPerson(dualWriteRepository, dualDistinctId, dualUuid), + ]) + + const singleUpdateEvent: Partial = { + distinct_id: singleDistinctId, + properties: { + $set: { updated: 'newValue', initial: 'changed' }, + }, + } + + const dualUpdateEvent: Partial = { + distinct_id: dualDistinctId, + properties: { + $set: { updated: 'newValue', initial: 'changed' }, + }, + } + + const { processor: singleProcessor, personsStore: singleStore } = createPersonProcessor( + singleWriteRepository, + singleUpdateEvent + ) + const { processor: dualProcessor, personsStore: dualStore } = createPersonProcessor( + dualWriteRepository, + dualUpdateEvent + ) + + // Use type assertion to access private context for testing + ;(singleProcessor as any).context.person = singlePerson + ;(dualProcessor as any).context.person = dualPerson + + await Promise.all([singleProcessor.processEvent(), dualProcessor.processEvent()]) + + await Promise.all([singleStore.flush(), dualStore.flush()]) + + const updatedSingle = await singleWriteRepository.fetchPerson(teamId, singleDistinctId) + const updatedDual = await dualWriteRepository.fetchPerson(teamId, dualDistinctId) + + expect(updatedSingle?.properties).toEqual({ initial: 'changed', updated: 'newValue' }) + expect(updatedDual?.properties).toEqual({ initial: 'changed', updated: 'newValue' }) + }) + }) + + describe('Person identification ($identify)', () => { + it('handles $identify event identically with both repositories', async () => { + const singleAnonId = 'single-anon-user' + const singleUserId = 'single-identified-user' + const dualAnonId = 'dual-anon-user' + const dualUserId = 'dual-identified-user' + + const createAnonPerson = async (repo: any, anonId: string) => { + const uuid = uuidFromDistinctId(teamId, anonId) + const result = await repo.createPerson( + timestamp, + { anonymous: true }, + {}, + {}, + teamId, + null, + false, + uuid, + [{ distinctId: anonId, version: 0 }] + ) + return result.person + } + + await Promise.all([ + createAnonPerson(singleWriteRepository, singleAnonId), + createAnonPerson(dualWriteRepository, dualAnonId), + ]) + + const singleIdentifyEvent: Partial = { + distinct_id: singleUserId, + properties: { + $anon_distinct_id: singleAnonId, + $set: { email: 'user@example.com' }, + }, + } + + const dualIdentifyEvent: Partial = { + distinct_id: dualUserId, + properties: { + $anon_distinct_id: dualAnonId, + $set: { email: 'user@example.com' }, + }, + } + + const { processor: singleProcessor, personsStore: singleStore } = createPersonProcessor( + singleWriteRepository, + singleIdentifyEvent + ) + const { processor: dualProcessor, personsStore: dualStore } = createPersonProcessor( + dualWriteRepository, + dualIdentifyEvent + ) + + await Promise.all([singleProcessor.processEvent(), dualProcessor.processEvent()]) + + await Promise.all([singleStore.flush(), dualStore.flush()]) + + const singleIdentified = await singleWriteRepository.fetchPerson(teamId, singleUserId) + const dualIdentified = await dualWriteRepository.fetchPerson(teamId, dualUserId) + + expect(singleIdentified).toBeDefined() + expect(dualIdentified).toBeDefined() + expect(singleIdentified?.properties.email).toBe('user@example.com') + expect(dualIdentified?.properties.email).toBe('user@example.com') + }) + }) + + describe('Error handling', () => { + it('handles creation conflicts consistently between repositories', async () => { + const uuid = new UUIDT().toString() + const distinctId = 'conflict-test-user' + + await singleWriteRepository.createPerson(timestamp, { first: true }, {}, {}, teamId, null, false, uuid, [ + { distinctId: 'first-' + distinctId, version: 0 }, + ]) + + await dualWriteRepository.createPerson( + timestamp, + { first: true }, + {}, + {}, + teamId, + null, + false, + new UUIDT().toString(), + [{ distinctId: 'dual-first-' + distinctId, version: 0 }] + ) + + const singleResult = await singleWriteRepository.createPerson( + timestamp, + { second: true }, + {}, + {}, + teamId, + null, + false, + new UUIDT().toString(), + [{ distinctId: 'first-' + distinctId, version: 0 }] + ) + + const dualResult = await dualWriteRepository.createPerson( + timestamp, + { second: true }, + {}, + {}, + teamId, + null, + false, + new UUIDT().toString(), + [{ distinctId: 'dual-first-' + distinctId, version: 0 }] + ) + + expect(singleResult.success).toBe(false) + expect(dualResult.success).toBe(false) + if (!singleResult.success && !dualResult.success) { + expect(singleResult.error).toBe('CreationConflict') + expect(dualResult.error).toBe('CreationConflict') + } + }) + + it('handles person not found consistently', async () => { + const distinctId = 'non-existent-user' + + const singlePerson = await singleWriteRepository.fetchPerson(teamId, distinctId) + const dualPerson = await dualWriteRepository.fetchPerson(teamId, distinctId) + + expect(singlePerson).toBeUndefined() + expect(dualPerson).toBeUndefined() + }) + }) + + describe('Process person profile flag', () => { + it('respects $process_person_profile=false identically', async () => { + const distinctId = 'ephemeral-user' + const event: Partial = { + distinct_id: distinctId, + properties: { + $process_person_profile: false, + $set: { should_not_persist: true }, + }, + } + + const { processor: singleProcessor, personsStore: singleStore } = createPersonProcessor( + singleWriteRepository, + event, + false + ) + const { processor: dualProcessor, personsStore: dualStore } = createPersonProcessor( + dualWriteRepository, + event, + false + ) + + await Promise.all([singleProcessor.processEvent(), dualProcessor.processEvent()]) + + await Promise.all([singleStore.flush(), dualStore.flush()]) + + const postgresPersons = await fetchPostgresPersonsH() + expect(postgresPersons.length).toBe(0) + }) + }) + + describe('Batch operations', () => { + it('creates multiple persons in batch consistently', async () => { + const distinctIds = ['batch-1', 'batch-2', 'batch-3'] + + for (const id of distinctIds) { + const event: Partial = { + distinct_id: `single-${id}`, + properties: { + $set: { batch: 'single', index: id }, + }, + } + const { processor, personsStore } = createPersonProcessor(singleWriteRepository, event) + await processor.processEvent() + await personsStore.flush() + } + + for (const id of distinctIds) { + const event: Partial = { + distinct_id: `dual-${id}`, + properties: { + $set: { batch: 'dual', index: id }, + }, + } + const { processor, personsStore } = createPersonProcessor(dualWriteRepository, event) + await processor.processEvent() + await personsStore.flush() + } + + const postgresPersons = await fetchPostgresPersonsH() + const singlePersons = postgresPersons.filter((p: InternalPerson) => p.properties.batch === 'single') + const dualPersons = postgresPersons.filter((p: InternalPerson) => p.properties.batch === 'dual') + + expect(singlePersons.length).toBe(distinctIds.length) + expect(dualPersons.length).toBe(distinctIds.length) + + singlePersons.forEach((person) => { + expect(person.properties.batch).toBe('single') + expect(distinctIds).toContain(person.properties.index) + }) + + dualPersons.forEach((person) => { + expect(person.properties.batch).toBe('dual') + expect(distinctIds).toContain(person.properties.index) + }) + }) + }) + + describe('Complex PersonMergeService transaction scenarios with dual-write', () => { + describe('mergeDistinctIds-OneExists: one person exists, adding new distinct ID', () => { + it('merges distinct IDs atomically when one person exists using PersonMergeService', async () => { + // This test validates the real PersonMergeService.mergeDistinctIds behavior + // when one distinct ID has an existing person and the other doesn't. + // This is the most common merge scenario during $identify events. + + const existingDistinctId = 'existing-user-merge-svc' + const newDistinctId = 'new-distinct-id-merge-svc' + + // Create an existing person with one distinct ID + const existingPersonResult = await dualWriteRepository.createPerson( + timestamp, + { name: 'Existing User', email: 'user@example.com' }, + {}, + {}, + teamId, + null, + false, + uuidFromDistinctId(teamId, existingDistinctId), + [{ distinctId: existingDistinctId, version: 0 }] + ) + + expect(existingPersonResult.success).toBe(true) + if (!existingPersonResult.success) { + throw new Error('Expected person creation to succeed') + } + const existingPerson = existingPersonResult.person + + // Create a PersonMergeService with dual-write repository + // We need to set up the context similar to how it's done during event processing + const personsStore = new BatchWritingPersonsStoreForBatch(dualWriteRepository, hub.db.kafkaProducer) + + const mergeEvent: PluginEvent = { + team_id: teamId, + distinct_id: newDistinctId, + properties: { + $anon_distinct_id: existingDistinctId, + }, + timestamp: timestamp.toISO(), + now: timestamp.toISO(), + event: '$identify', + uuid: new UUIDT().toString(), + ip: '', + site_url: '', + } as PluginEvent + + const context = new PersonContext( + mergeEvent, + mainTeam, + newDistinctId, + timestamp, + true, // processPerson + hub.db.kafkaProducer, + personsStore, + 0 // deferredUpdatesStep + ) + + const mergeService = new PersonMergeService(context) + + // Call the private mergeDistinctIds method (we'll need to make it accessible for testing) + // For now, let's test through the public handleIdentifyOrAlias method + ;(context as any).anonDistinctId = existingDistinctId + const [mergedPerson, updatePromise] = await mergeService.handleIdentifyOrAlias() + await updatePromise + + // Flush any pending operations + await personsStore.flush() + + // Verify the merge was successful + expect(mergedPerson).toBeDefined() + expect(mergedPerson?.id).toBe(existingPerson.id) + + // Verify both distinct IDs now point to the same person + const personFromExisting = await dualWriteRepository.fetchPerson(teamId, existingDistinctId) + const personFromNew = await dualWriteRepository.fetchPerson(teamId, newDistinctId) + + expect(personFromExisting).toBeDefined() + expect(personFromNew).toBeDefined() + expect(personFromExisting?.id).toBe(personFromNew?.id) + expect(personFromExisting?.id).toBe(existingPerson.id) + + // Verify data consistency across both databases + const primaryDids = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [existingPerson.id], + 'verify-primary-dids' + ) + const secondaryDids = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2) ORDER BY distinct_id', + [teamId, existingPerson.uuid], + 'verify-secondary-dids' + ) + + const expectedDids = [existingDistinctId, newDistinctId].sort() + expect(primaryDids.rows.map((r: any) => r.distinct_id)).toEqual(expectedDids) + expect(secondaryDids.rows.map((r: any) => r.distinct_id)).toEqual(expectedDids) + }) + }) + + describe('mergeDistinctIds-NeitherExist: neither distinct ID has a person', () => { + it('creates a new person and adds both distinct IDs atomically using PersonMergeService', async () => { + // This test validates the PersonMergeService.mergeDistinctIds behavior + // when neither distinct ID has an existing person yet. + // This happens during $identify events when both IDs are new. + + const firstDistinctId = 'new-user-1-neither' + const secondDistinctId = 'new-user-2-neither' + + // Verify neither distinct ID has a person yet + const person1Before = await dualWriteRepository.fetchPerson(teamId, firstDistinctId) + const person2Before = await dualWriteRepository.fetchPerson(teamId, secondDistinctId) + expect(person1Before).toBeUndefined() + expect(person2Before).toBeUndefined() + + // Create PersonMergeService context for this scenario + const personsStore = new BatchWritingPersonsStoreForBatch(dualWriteRepository, hub.db.kafkaProducer) + + const mergeEvent: PluginEvent = { + team_id: teamId, + distinct_id: firstDistinctId, + properties: { + $anon_distinct_id: secondDistinctId, + $set: { source: 'neither-exist-test' }, + }, + timestamp: timestamp.toISO(), + now: timestamp.toISO(), + event: '$identify', + uuid: new UUIDT().toString(), + ip: '', + site_url: '', + } as PluginEvent + + const context = new PersonContext( + mergeEvent, + mainTeam, + firstDistinctId, + timestamp, + true, // processPerson + hub.db.kafkaProducer, + personsStore, + 0 // deferredUpdatesStep + ) + + const mergeService = new PersonMergeService(context) + + // Set up the context for merging + ;(context as any).anonDistinctId = secondDistinctId + + // Execute the merge - this should create a new person with both distinct IDs + const [mergedPerson, updatePromise] = await mergeService.handleIdentifyOrAlias() + await updatePromise + + // Flush any pending operations + await personsStore.flush() + + // Verify a new person was created + expect(mergedPerson).toBeDefined() + expect(mergedPerson?.properties.source).toBe('neither-exist-test') + expect(mergedPerson?.properties.$creator_event_uuid).toBeDefined() + + // Verify both distinct IDs now point to the same person + const person1After = await dualWriteRepository.fetchPerson(teamId, firstDistinctId) + const person2After = await dualWriteRepository.fetchPerson(teamId, secondDistinctId) + + expect(person1After).toBeDefined() + expect(person2After).toBeDefined() + expect(person1After?.id).toBe(person2After?.id) + expect(person1After?.id).toBe(mergedPerson?.id) + + // Verify the person exists in both databases + const primaryPerson = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE id = $1', + [mergedPerson?.id], + 'verify-primary-person-neither' + ) + const secondaryPerson = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [mergedPerson?.uuid], + 'verify-secondary-person-neither' + ) + + expect(primaryPerson.rows.length).toBe(1) + expect(secondaryPerson.rows.length).toBe(1) + // PersonMergeService adds $creator_event_uuid when creating a new person + expect(primaryPerson.rows[0].properties.source).toBe('neither-exist-test') + expect(primaryPerson.rows[0].properties.$creator_event_uuid).toBeDefined() + expect(secondaryPerson.rows[0].properties.source).toBe('neither-exist-test') + expect(secondaryPerson.rows[0].properties.$creator_event_uuid).toBeDefined() + expect(primaryPerson.rows[0].properties).toEqual(secondaryPerson.rows[0].properties) + + // Verify both distinct IDs are in both databases + const primaryDids = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [mergedPerson?.id], + 'verify-primary-dids-neither' + ) + const secondaryDids = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2) ORDER BY distinct_id', + [teamId, mergedPerson?.uuid], + 'verify-secondary-dids-neither' + ) + + const expectedDids = [firstDistinctId, secondDistinctId].sort() + expect(primaryDids.rows.map((r: any) => r.distinct_id)).toEqual(expectedDids) + expect(secondaryDids.rows.map((r: any) => r.distinct_id)).toEqual(expectedDids) + }) + }) + + describe('mergePeople: both distinct IDs have different persons', () => { + it('merges two existing persons atomically with all their distinct IDs using PersonMergeService', async () => { + // This test validates the most complex merge scenario where both distinct IDs + // already have different persons. This requires: + // 1. Moving all distinct IDs from source person to target person + // 2. Updating the target person's properties + // 3. Deleting the source person + // 4. Updating cohorts and feature flags + // All within a single transaction to ensure atomicity + + const person1DistinctId = 'person1-main' + const person1ExtraId = 'person1-extra' + const person2DistinctId = 'person2-main' + const person2ExtraId = 'person2-extra' + + // Create first person with multiple distinct IDs + const person1Result = await dualWriteRepository.createPerson( + timestamp, + { name: 'Person 1', status: 'active', age: 25 }, + {}, + {}, + teamId, + null, + true, // is_identified + uuidFromDistinctId(teamId, person1DistinctId), + [{ distinctId: person1DistinctId, version: 0 }] + ) + expect(person1Result.success).toBe(true) + if (!person1Result.success) { + throw new Error('Expected person creation to succeed') + } + const person1 = person1Result.person + + // Add extra distinct ID to person1 + await dualWriteRepository.addDistinctId(person1, person1ExtraId, 0) + + // Create second person with multiple distinct IDs + const person2Result = await dualWriteRepository.createPerson( + timestamp.plus({ minutes: 1 }), // Created later + { name: 'Person 2', email: 'person2@test.com', age: 30 }, + {}, + {}, + teamId, + null, + false, // not identified + uuidFromDistinctId(teamId, person2DistinctId), + [{ distinctId: person2DistinctId, version: 0 }] + ) + expect(person2Result.success).toBe(true) + if (!person2Result.success) { + throw new Error('Expected person creation to succeed') + } + const person2 = person2Result.person + + // Add extra distinct ID to person2 + await dualWriteRepository.addDistinctId(person2, person2ExtraId, 0) + + // Now perform the merge using PersonMergeService + const personsStore = new BatchWritingPersonsStoreForBatch(dualWriteRepository, hub.db.kafkaProducer) + + // The merge event would have person2's distinct ID identifying with person1's + const mergeEvent: PluginEvent = { + team_id: teamId, + distinct_id: person1DistinctId, + properties: { + $anon_distinct_id: person2DistinctId, + $set: { merged: true }, + }, + timestamp: timestamp.plus({ minutes: 2 }).toISO(), + now: timestamp.plus({ minutes: 2 }).toISO(), + event: '$identify', + uuid: new UUIDT().toString(), + ip: '', + site_url: '', + } as PluginEvent + + const context = new PersonContext( + mergeEvent, + mainTeam, + person1DistinctId, + timestamp.plus({ minutes: 2 }), + true, // processPerson + hub.db.kafkaProducer, + personsStore, + 0 // deferredUpdatesStep + ) + + const mergeService = new PersonMergeService(context) + + // Set up the context for merging two existing persons + ;(context as any).anonDistinctId = person2DistinctId + + // Execute the merge + const [mergedPerson, updatePromise] = await mergeService.handleIdentifyOrAlias() + await updatePromise + + // Flush any pending operations + await personsStore.flush() + + // Verify the merge result + expect(mergedPerson).toBeDefined() + // Person1 was created first, so it should be the target + expect(mergedPerson?.id).toBe(person1.id) + // Properties are merged - person1's properties are kept, person2's new properties are added + // The merge adds new properties but doesn't override existing ones + expect(mergedPerson?.properties.name).toBe('Person 1') // person1's name is kept + expect(mergedPerson?.properties.status).toBe('active') // person1's status is kept + expect(mergedPerson?.properties.email).toBe('person2@test.com') // person2's email is added + expect(mergedPerson?.properties.age).toBe(25) // person1's age is kept + expect(mergedPerson?.properties.merged).toBe(true) // new property from event + expect(mergedPerson?.is_identified).toBe(true) // Should be identified after merge + + // Verify all distinct IDs now point to person1 + const distinctIds = [person1DistinctId, person1ExtraId, person2DistinctId, person2ExtraId] + for (const did of distinctIds) { + const person = await dualWriteRepository.fetchPerson(teamId, did) + expect(person).toBeDefined() + expect(person?.id).toBe(person1.id) + } + + // Verify person2 was deleted + const deletedPerson = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE id = $1', + [person2.id], + 'check-deleted-person' + ) + expect(deletedPerson.rows.length).toBe(0) + + // Verify consistency across both databases + const primaryDids = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1 ORDER BY distinct_id', + [person1.id], + 'verify-primary-all-dids' + ) + const secondaryDids = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2) ORDER BY distinct_id', + [teamId, person1.uuid], + 'verify-secondary-all-dids' + ) + + expect(primaryDids.rows.map((r: any) => r.distinct_id).sort()).toEqual(distinctIds.sort()) + expect(secondaryDids.rows.map((r: any) => r.distinct_id).sort()).toEqual(distinctIds.sort()) + + // Verify the merged person's properties are consistent + const primaryPerson = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE id = $1', + [person1.id], + 'verify-primary-merged-person' + ) + const secondaryPerson = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [person1.uuid], + 'verify-secondary-merged-person' + ) + + expect(primaryPerson.rows.length).toBe(1) + expect(secondaryPerson.rows.length).toBe(1) + expect(primaryPerson.rows[0].is_identified).toBe(true) + expect(secondaryPerson.rows[0].is_identified).toBe(true) + }) + }) + + describe('Rollback behavior on transaction failures', () => { + it('rolls back entire merge operation when moveDistinctIds fails in dual-write', async () => { + // This test validates that when any operation within the complex merge transaction fails, + // the entire operation is rolled back atomically across both databases + + const person1Id = 'rollback-person1' + const person2Id = 'rollback-person2' + + // Create two persons + const person1Result = await dualWriteRepository.createPerson( + timestamp, + { name: 'Rollback Person 1' }, + {}, + {}, + teamId, + null, + false, + uuidFromDistinctId(teamId, person1Id), + [{ distinctId: person1Id, version: 0 }] + ) + + const person2Result = await dualWriteRepository.createPerson( + timestamp.plus({ minutes: 1 }), + { name: 'Rollback Person 2' }, + {}, + {}, + teamId, + null, + false, + uuidFromDistinctId(teamId, person2Id), + [{ distinctId: person2Id, version: 0 }] + ) + + expect(person1Result.success).toBe(true) + expect(person2Result.success).toBe(true) + if (!person1Result.success || !person2Result.success) { + throw new Error('Expected person creations to succeed') + } + const person1 = person1Result.person + const person2 = person2Result.person + + // Mock the secondary repository to fail during moveDistinctIds + // Need to fail 3 times to exhaust the default retry count + const spy = jest.spyOn((dualWriteRepository as any).secondaryRepo, 'moveDistinctIds') + spy.mockRejectedValueOnce(new Error('Simulated secondary database failure - attempt 1')) + .mockRejectedValueOnce(new Error('Simulated secondary database failure - attempt 2')) + .mockRejectedValueOnce(new Error('Simulated secondary database failure - attempt 3')) + + // Attempt the merge which should fail and rollback + const personsStore = new BatchWritingPersonsStoreForBatch(dualWriteRepository, hub.db.kafkaProducer) + + const mergeEvent: PluginEvent = { + team_id: teamId, + distinct_id: person1Id, + properties: { + $anon_distinct_id: person2Id, + }, + timestamp: timestamp.plus({ minutes: 2 }).toISO(), + now: timestamp.plus({ minutes: 2 }).toISO(), + event: '$identify', + uuid: new UUIDT().toString(), + ip: '', + site_url: '', + } as PluginEvent + + const context = new PersonContext( + mergeEvent, + mainTeam, + person1Id, + timestamp.plus({ minutes: 2 }), + true, + hub.db.kafkaProducer, + personsStore, + 0 + ) + + const mergeService = new PersonMergeService(context) + ;(context as any).anonDistinctId = person2Id + + // The merge should fail internally but PersonMergeService catches errors + // We need to check that the operation didn't succeed + try { + const [_, updatePromise] = await mergeService.handleIdentifyOrAlias() + await updatePromise + await personsStore.flush() + } catch (e: any) { + // Expected to catch error + } + + spy.mockRestore() + + // After exhausting retries, the merge should have failed + // PersonMergeService catches the error and returns undefined + + // Verify nothing changed - both persons should still exist independently + const person1After = await dualWriteRepository.fetchPerson(teamId, person1Id) + const person2After = await dualWriteRepository.fetchPerson(teamId, person2Id) + + expect(person1After).toBeDefined() + expect(person2After).toBeDefined() + expect(person1After?.id).toBe(person1.id) + expect(person2After?.id).toBe(person2.id) + expect(person1After?.id).not.toBe(person2After?.id) + + // Verify distinct IDs are still with their original persons + const primaryPerson1Dids = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1', + [person1.id], + 'verify-rollback-person1-dids' + ) + const primaryPerson2Dids = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1', + [person2.id], + 'verify-rollback-person2-dids' + ) + + expect(primaryPerson1Dids.rows.map((r: any) => r.distinct_id)).toEqual([person1Id]) + expect(primaryPerson2Dids.rows.map((r: any) => r.distinct_id)).toEqual([person2Id]) + + // Verify both persons still exist in both databases + const secondaryPerson1 = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [person1.uuid], + 'verify-secondary-person1-exists' + ) + const secondaryPerson2 = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT * FROM posthog_person WHERE uuid = $1', + [person2.uuid], + 'verify-secondary-person2-exists' + ) + + expect(secondaryPerson1.rows.length).toBe(1) + expect(secondaryPerson2.rows.length).toBe(1) + }) + + it('rolls back when database constraint violation occurs during merge', async () => { + // This test validates rollback behavior when a database constraint is violated + // (e.g., unique constraint on distinct_id) + + const existingId = 'constraint-existing' + const newPersonId = 'constraint-new' + + // Create an existing person + const existingResult = await dualWriteRepository.createPerson( + timestamp, + { name: 'Existing for Constraint Test' }, + {}, + {}, + teamId, + null, + false, + uuidFromDistinctId(teamId, existingId), + [{ distinctId: existingId, version: 0 }] + ) + + expect(existingResult.success).toBe(true) + if (!existingResult.success) { + throw new Error('Expected person creation to succeed') + } + const existingPerson = existingResult.person + + // Mock to simulate a constraint violation when adding distinct ID + // Need to fail 3 times to exhaust the default retry count + const spy = jest.spyOn((dualWriteRepository as any).primaryRepo, 'addDistinctId') + spy.mockRejectedValueOnce(new Error('duplicate key value violates unique constraint - attempt 1')) + .mockRejectedValueOnce(new Error('duplicate key value violates unique constraint - attempt 2')) + .mockRejectedValueOnce(new Error('duplicate key value violates unique constraint - attempt 3')) + + // Try to merge with a new distinct ID + const personsStore = new BatchWritingPersonsStoreForBatch(dualWriteRepository, hub.db.kafkaProducer) + + const mergeEvent: PluginEvent = { + team_id: teamId, + distinct_id: newPersonId, + properties: { + $anon_distinct_id: existingId, + $set: { should_not_persist: true }, + }, + timestamp: timestamp.toISO(), + now: timestamp.toISO(), + event: '$identify', + uuid: new UUIDT().toString(), + ip: '', + site_url: '', + } as PluginEvent + + const context = new PersonContext( + mergeEvent, + mainTeam, + newPersonId, + timestamp, + true, + hub.db.kafkaProducer, + personsStore, + 0 + ) + + const mergeService = new PersonMergeService(context) + ;(context as any).anonDistinctId = existingId + + // The operation should fail internally + try { + const [_, updatePromise] = await mergeService.handleIdentifyOrAlias() + await updatePromise + await personsStore.flush() + } catch (e: any) { + // Expected to catch error + } + + spy.mockRestore() + + // Verify the existing person is unchanged + const personAfter = await dualWriteRepository.fetchPerson(teamId, existingId) + expect(personAfter).toBeDefined() + expect(personAfter?.id).toBe(existingPerson.id) + expect(personAfter?.properties).toEqual({ name: 'Existing for Constraint Test' }) + expect(personAfter?.properties.should_not_persist).toBeUndefined() + + // Verify the new distinct ID was not added + const person2 = await dualWriteRepository.fetchPerson(teamId, newPersonId) + expect(person2).toBeUndefined() + + // Verify consistency across databases + const primaryDids = await hub.db.postgres.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE person_id = $1', + [existingPerson.id], + 'verify-constraint-primary-dids' + ) + const secondaryDids = await hub.db.postgresPersonMigration.query( + PostgresUse.PERSONS_READ, + 'SELECT distinct_id FROM posthog_persondistinctid WHERE team_id = $1 AND person_id = (SELECT id FROM posthog_person WHERE uuid = $2)', + [teamId, existingPerson.uuid], + 'verify-constraint-secondary-dids' + ) + + expect(primaryDids.rows.map((r: any) => r.distinct_id)).toEqual([existingId]) + expect(secondaryDids.rows.map((r: any) => r.distinct_id)).toEqual([existingId]) + }) + }) + }) +})