refactor: process cookieless through batch pipelines (#38463)

This commit is contained in:
Paweł Ledwoń
2025-09-25 12:41:30 +01:00
committed by GitHub
parent 9335998c53
commit e7bd7841f1
51 changed files with 4038 additions and 1486 deletions

View File

@@ -25,7 +25,7 @@ exports[`IngestionConsumer general overflow force overflow should force events w
{
"headers": {
"distinct_id": "team1-user",
"redirect-step": "async_pipeline_result_handler",
"redirect-step": "result_handler",
"redirect-timestamp": "2025-01-01T00:00:00.000Z",
"token": "THIS IS NOT A TOKEN FOR TEAM 2",
},
@@ -48,7 +48,7 @@ exports[`IngestionConsumer general overflow force overflow should handle multipl
{
"headers": {
"distinct_id": "user1",
"redirect-step": "async_pipeline_result_handler",
"redirect-step": "result_handler",
"redirect-timestamp": "2025-01-01T00:00:00.000Z",
"token": "THIS IS NOT A TOKEN FOR TEAM 2",
},
@@ -66,7 +66,7 @@ exports[`IngestionConsumer general overflow force overflow should handle multipl
{
"headers": {
"distinct_id": "user2",
"redirect-step": "async_pipeline_result_handler",
"redirect-step": "result_handler",
"redirect-timestamp": "2025-01-01T00:00:00.000Z",
"token": "<REPLACED-UUID-1>",
},

View File

@@ -13,6 +13,7 @@ import { closeHub, createHub } from '../../utils/db/hub'
import { PostgresUse } from '../../utils/db/postgres'
import { parseJSON } from '../../utils/json-parse'
import { UUID7 } from '../../utils/utils'
import { isOkResult } from '../pipelines/results'
import {
COOKIELESS_MODE_FLAG_PROPERTY,
COOKIELESS_SENTINEL_VALUE,
@@ -282,8 +283,9 @@ describe('CookielessManager', () => {
headers: { token?: string; distinct_id?: string; timestamp?: string } = {}
): Promise<PipelineEvent | undefined> {
const response = await hub.cookielessManager.doBatch([{ event, team, message, headers }])
expect(response.length).toBeLessThanOrEqual(1)
return response[0]?.event
expect(response.length).toBe(1)
const result = response[0]
return isOkResult(result) ? result.value.event : undefined
}
async function processEventWithHeaders(
@@ -294,10 +296,11 @@ describe('CookielessManager', () => {
headers: { token?: string; distinct_id?: string; timestamp?: string }
}> {
const response = await hub.cookielessManager.doBatch([{ event, team, message, headers }])
expect(response.length).toBeLessThanOrEqual(1)
expect(response.length).toBe(1)
const result = response[0]
return {
event: response[0]?.event,
headers: response[0]?.headers || {},
event: isOkResult(result) ? result.value.event : undefined,
headers: isOkResult(result) ? result.value.headers || {} : {},
}
}

View File

@@ -28,6 +28,7 @@ import { TeamManager } from '../../utils/team-manager'
import { UUID7, bufferToUint32ArrayLE, uint32ArrayLEToBuffer } from '../../utils/utils'
import { compareTimestamps } from '../../worker/ingestion/timestamp-comparison'
import { toStartOfDayInTimezone, toYearMonthDayInTimezone } from '../../worker/ingestion/timestamps'
import { PipelineResult, drop, ok } from '../pipelines/results'
import { RedisHelpers } from './redis-helpers'
/* ---------------------------------------------------------------------
@@ -275,7 +276,7 @@ export class CookielessManager {
return buf
}
async doBatch(events: IncomingEventWithTeam[]): Promise<IncomingEventWithTeam[]> {
async doBatch(events: IncomingEventWithTeam[]): Promise<PipelineResult<IncomingEventWithTeam>[]> {
if (this.config.disabled) {
// cookieless is globally disabled, don't do any processing just drop all cookieless events
return this.dropAllCookielessEvents(events, 'cookieless_globally_disabled')
@@ -296,15 +297,20 @@ export class CookielessManager {
}
}
private async doBatchInner(events: IncomingEventWithTeam[]): Promise<IncomingEventWithTeam[]> {
private async doBatchInner(events: IncomingEventWithTeam[]): Promise<PipelineResult<IncomingEventWithTeam>[]> {
const hashCache: Record<string, Buffer> = {}
// Track results for each input event - initialize all as success, will be overwritten if dropped
const results: PipelineResult<IncomingEventWithTeam>[] = events.map((event) => ok(event))
// do a first pass just to extract properties and compute the base hash for stateful cookieless events
const eventsWithStatus: EventWithStatus[] = []
for (const { event, team, message, headers } of events) {
for (let i = 0; i < events.length; i++) {
const { event, team, message, headers } = events[i]
if (!event.properties?.[COOKIELESS_MODE_FLAG_PROPERTY]) {
// push the event as is, we don't need to do anything with it, but preserve the ordering
eventsWithStatus.push({ event, team, message, headers })
eventsWithStatus.push({ event, team, message, headers, originalIndex: i })
continue
}
@@ -318,6 +324,7 @@ export class CookielessManager {
drop_cause: 'cookieless_disallowed_event',
})
.inc()
results[i] = drop('Event type not supported in cookieless mode')
continue
}
if (
@@ -331,6 +338,7 @@ export class CookielessManager {
drop_cause: 'cookieless_stateless_disallowed_identify',
})
.inc()
results[i] = drop('$identify not supported in stateless cookieless mode')
continue
}
@@ -345,6 +353,7 @@ export class CookielessManager {
drop_cause: 'cookieless_team_disabled',
})
.inc()
results[i] = drop('Cookieless disabled for team')
continue
}
const timestamp = event.timestamp ?? event.sent_at ?? event.now
@@ -356,6 +365,7 @@ export class CookielessManager {
drop_cause: 'cookieless_no_timestamp',
})
.inc()
results[i] = drop('Missing timestamp')
continue
}
@@ -388,6 +398,7 @@ export class CookielessManager {
: 'cookieless_missing_host',
})
.inc()
results[i] = drop(!userAgent ? 'Missing user agent' : !ip ? 'Missing IP' : 'Missing host')
continue
}
@@ -408,6 +419,7 @@ export class CookielessManager {
team,
message,
headers,
originalIndex: i,
firstPass: {
timestampMs,
eventTimeZone,
@@ -422,7 +434,7 @@ export class CookielessManager {
// early exit if we don't need to do anything
if (!eventsWithStatus.some((e) => e.firstPass)) {
return eventsWithStatus
return results
}
// Do a second pass to see what `identifiesRedisKey`s we need to load from redis for stateful events.
@@ -612,13 +624,19 @@ export class CookielessManager {
)
}
// remove the extra processing state from the returned object
return eventsWithStatus.map(({ event, team, message, headers }) => ({ event, team, message, headers }))
// Update results with successfully processed events
for (const { event, team, message, headers, originalIndex } of eventsWithStatus) {
results[originalIndex] = ok({ event, team, message, headers })
}
return results
}
dropAllCookielessEvents(events: IncomingEventWithTeam[], dropCause: string): IncomingEventWithTeam[] {
const nonCookielessEvents: IncomingEventWithTeam[] = []
for (const incomingEvent of events) {
dropAllCookielessEvents(
events: IncomingEventWithTeam[],
dropCause: string
): PipelineResult<IncomingEventWithTeam>[] {
return events.map((incomingEvent) => {
if (incomingEvent.event.properties?.[COOKIELESS_MODE_FLAG_PROPERTY]) {
eventDroppedCounter
.labels({
@@ -626,11 +644,11 @@ export class CookielessManager {
drop_cause: dropCause,
})
.inc()
return drop(dropCause)
} else {
nonCookielessEvents.push(incomingEvent)
return ok(incomingEvent)
}
}
return nonCookielessEvents
})
}
}
@@ -639,6 +657,7 @@ type EventWithStatus = {
event: PipelineEvent
team: Team
headers: EventHeaders
originalIndex: number
// Store temporary processing state. Nest the passes to make type-checking easier
firstPass?: {
timestampMs: number

View File

@@ -0,0 +1,22 @@
import { Hub, IncomingEventWithTeam } from '../../types'
import { PipelineResult, isOkResult, ok } from '../pipelines/results'
export function createApplyCookielessProcessingStep<T extends { eventWithTeam: IncomingEventWithTeam }>(hub: Hub) {
return async function applyCookielessProcessingStep(events: T[]): Promise<PipelineResult<T>[]> {
const cookielessResults = await hub.cookielessManager.doBatch(events.map((x) => x.eventWithTeam))
return events.map((event, index) => {
const cookielessResult = cookielessResults[index]
if (isOkResult(cookielessResult)) {
return ok({
...event,
eventWithTeam: cookielessResult.value,
})
} else {
// Return the drop/dlq/redirect result from cookieless processing
return cookielessResult
}
})
}
}

View File

@@ -1,8 +1,8 @@
import { eventDroppedCounter } from '../../main/ingestion-queues/metrics'
import { EventHeaders } from '../../types'
import { EventIngestionRestrictionManager } from '../../utils/event-ingestion-restriction-manager'
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { SyncPreprocessingStep } from '../processing-pipeline'
import { drop, ok } from '../pipelines/results'
import { SyncProcessingStep } from '../pipelines/steps'
function applyDropEventsRestrictions(
eventIngestionRestrictionManager: EventIngestionRestrictionManager,
@@ -16,8 +16,8 @@ function applyDropEventsRestrictions(
export function createApplyDropRestrictionsStep<T extends { headers: EventHeaders }>(
eventIngestionRestrictionManager: EventIngestionRestrictionManager
): SyncPreprocessingStep<T, T> {
return (input) => {
): SyncProcessingStep<T, T> {
return function applyDropRestrictionsStep(input) {
const { headers } = input
if (applyDropEventsRestrictions(eventIngestionRestrictionManager, headers)) {
@@ -30,6 +30,6 @@ export function createApplyDropRestrictionsStep<T extends { headers: EventHeader
return drop('Event dropped due to token restrictions')
}
return success(input)
return ok(input)
}
}

View File

@@ -1,8 +1,8 @@
import { ingestionOverflowingMessagesTotal } from '../../main/ingestion-queues/batch-processing/metrics'
import { EventHeaders } from '../../types'
import { EventIngestionRestrictionManager } from '../../utils/event-ingestion-restriction-manager'
import { redirect, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { SyncPreprocessingStep } from '../processing-pipeline'
import { ok, redirect } from '../pipelines/results'
import { SyncProcessingStep } from '../pipelines/steps'
export type ForceOverflowDecision = {
shouldRedirect: boolean
@@ -37,17 +37,17 @@ function applyForceOverflowRestrictions(
export function createApplyForceOverflowRestrictionsStep<T extends { headers: EventHeaders }>(
eventIngestionRestrictionManager: EventIngestionRestrictionManager,
overflowConfig: OverflowConfig
): SyncPreprocessingStep<T, T> {
return (input) => {
): SyncProcessingStep<T, T> {
return function applyForceOverflowRestrictionsStep(input) {
const { headers } = input
if (!overflowConfig.overflowEnabled) {
return success(input)
return ok(input)
}
const forceOverflowDecision = applyForceOverflowRestrictions(eventIngestionRestrictionManager, headers)
if (!forceOverflowDecision.shouldRedirect) {
return success(input)
return ok(input)
}
ingestionOverflowingMessagesTotal.inc()

View File

@@ -1,7 +1,7 @@
import { IncomingEventWithTeam } from '../../types'
import { EventIngestionRestrictionManager } from '../../utils/event-ingestion-restriction-manager'
import { success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { SyncPreprocessingStep } from '../processing-pipeline'
import { ok } from '../pipelines/results'
import { SyncProcessingStep } from '../pipelines/steps'
function applyPersonProcessingRestrictions(
eventWithTeam: IncomingEventWithTeam,
@@ -28,10 +28,10 @@ function applyPersonProcessingRestrictions(
// TODO: Refactor this to use just headers and the team before parsing the event
export function createApplyPersonProcessingRestrictionsStep<T extends { eventWithTeam: IncomingEventWithTeam }>(
eventIngestionRestrictionManager: EventIngestionRestrictionManager
): SyncPreprocessingStep<T, T> {
return (input) => {
): SyncProcessingStep<T, T> {
return function applyPersonProcessingRestrictionsStep(input) {
const { eventWithTeam } = input
applyPersonProcessingRestrictions(eventWithTeam, eventIngestionRestrictionManager)
return success(input)
return ok(input)
}
}

View File

@@ -1,7 +1,8 @@
export { createApplyCookielessProcessingStep } from './apply-cookieless-processing'
export { createApplyDropRestrictionsStep } from './apply-drop-events-restrictions'
export { createApplyForceOverflowRestrictionsStep } from './apply-force-overflow-restrictions'
export { createApplyPersonProcessingRestrictionsStep } from './apply-person-processing-restrictions'
export { createParseHeadersStep } from './parse-headers'
export { createParseKafkaMessageStep } from './parse-kafka-message'
export { createResolveTeamStep } from './resolve-team'
export { createValidateEventUuidStep } from './validate-event-uuid'
export { createApplyForceOverflowRestrictionsStep } from './apply-force-overflow-restrictions'
export { createParseHeadersStep } from './parse-headers'

View File

@@ -2,16 +2,16 @@ import { Message } from 'node-rdkafka'
import { parseEventHeaders } from '../../kafka/consumer'
import { EventHeaders } from '../../types'
import { success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { SyncPreprocessingStep } from '../processing-pipeline'
import { ok } from '../pipelines/results'
import { SyncProcessingStep } from '../pipelines/steps'
export function createParseHeadersStep<T extends { message: Pick<Message, 'headers'> }>(): SyncPreprocessingStep<
export function createParseHeadersStep<T extends { message: Pick<Message, 'headers'> }>(): SyncProcessingStep<
T,
T & { headers: EventHeaders }
> {
return (input) => {
return function parseHeadersStep(input) {
const { message } = input
const parsedHeaders = parseEventHeaders(message.headers)
return success({ ...input, headers: parsedHeaders })
return ok({ ...input, headers: parsedHeaders })
}
}

View File

@@ -4,8 +4,8 @@ import { IncomingEvent, PipelineEvent } from '../../types'
import { normalizeEvent } from '../../utils/event'
import { parseJSON } from '../../utils/json-parse'
import { logger } from '../../utils/logger'
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { SyncPreprocessingStep } from '../processing-pipeline'
import { drop, ok } from '../pipelines/results'
import { SyncProcessingStep } from '../pipelines/steps'
function parseKafkaMessage(message: Message): IncomingEvent | null {
try {
@@ -21,11 +21,11 @@ function parseKafkaMessage(message: Message): IncomingEvent | null {
}
}
export function createParseKafkaMessageStep<T extends { message: Message }>(): SyncPreprocessingStep<
export function createParseKafkaMessageStep<T extends { message: Message }>(): SyncProcessingStep<
T,
T & { event: IncomingEvent }
> {
return (input) => {
return function parseKafkaMessageStep(input) {
const { message } = input
const parsedEvent = parseKafkaMessage(message)
@@ -33,6 +33,6 @@ export function createParseKafkaMessageStep<T extends { message: Message }>(): S
return drop('Failed to parse Kafka message')
}
return success({ ...input, event: parsedEvent })
return ok({ ...input, event: parsedEvent })
}
}

View File

@@ -3,8 +3,8 @@ import { Message } from 'node-rdkafka'
import { eventDroppedCounter } from '../../main/ingestion-queues/metrics'
import { EventHeaders, Hub, IncomingEvent, IncomingEventWithTeam } from '../../types'
import { tokenOrTeamPresentCounter } from '../../worker/ingestion/event-pipeline/metrics'
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { AsyncPreprocessingStep } from '../processing-pipeline'
import { drop, ok } from '../pipelines/results'
import { AsyncProcessingStep } from '../pipelines/steps'
async function resolveTeam(
hub: Pick<Hub, 'teamManager'>,
@@ -51,8 +51,8 @@ async function resolveTeam(
export function createResolveTeamStep<T extends { message: Message; headers: EventHeaders; event: IncomingEvent }>(
hub: Hub
): AsyncPreprocessingStep<T, T & { eventWithTeam: IncomingEventWithTeam }> {
return async (input) => {
): AsyncProcessingStep<T, T & { eventWithTeam: IncomingEventWithTeam }> {
return async function resolveTeamStep(input) {
const { message, headers, event } = input
const eventWithTeam = await resolveTeam(hub, message, headers, event.event)
@@ -61,6 +61,6 @@ export function createResolveTeamStep<T extends { message: Message; headers: Eve
return drop('Failed to resolve team')
}
return success({ ...input, eventWithTeam })
return ok({ ...input, eventWithTeam })
}
}

View File

@@ -1,9 +1,9 @@
import { eventDroppedCounter } from '../../main/ingestion-queues/metrics'
import { Hub, IncomingEventWithTeam } from '../../types'
import { UUID } from '../../utils/utils'
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
import { captureIngestionWarning } from '../../worker/ingestion/utils'
import { AsyncPreprocessingStep } from '../processing-pipeline'
import { drop, ok } from '../pipelines/results'
import { AsyncProcessingStep } from '../pipelines/steps'
async function isEventUuidValid(eventWithTeam: IncomingEventWithTeam, hub: Pick<Hub, 'db'>): Promise<boolean> {
const { event, team } = eventWithTeam
@@ -39,13 +39,13 @@ async function isEventUuidValid(eventWithTeam: IncomingEventWithTeam, hub: Pick<
export function createValidateEventUuidStep<T extends { eventWithTeam: IncomingEventWithTeam }>(
hub: Hub
): AsyncPreprocessingStep<T, T> {
return async (input) => {
): AsyncProcessingStep<T, T> {
return async function validateEventUuidStep(input) {
const { eventWithTeam } = input
const isValid = await isEventUuidValid(eventWithTeam, hub)
if (!isValid) {
return drop('Event has invalid UUID')
}
return success(input)
return ok(input)
}
}

View File

@@ -31,10 +31,10 @@ import { BatchWritingGroupStore } from '../worker/ingestion/groups/batch-writing
import { GroupStoreForBatch } from '../worker/ingestion/groups/group-store-for-batch.interface'
import { BatchWritingPersonsStore } from '../worker/ingestion/persons/batch-writing-person-store'
import { FlushResult, PersonsStoreForBatch } from '../worker/ingestion/persons/persons-store-for-batch'
import { PipelineConfig, ResultHandlingPipeline } from '../worker/ingestion/result-handling-pipeline'
import { deduplicateEvents } from './deduplication/events'
import { DeduplicationRedis, createDeduplicationRedis } from './deduplication/redis-client'
import {
createApplyCookielessProcessingStep,
createApplyDropRestrictionsStep,
createApplyForceOverflowRestrictionsStep,
createApplyPersonProcessingRestrictionsStep,
@@ -43,6 +43,8 @@ import {
createResolveTeamStep,
createValidateEventUuidStep,
} from './event-preprocessing'
import { createBatch, createNewBatchPipeline, createNewPipeline } from './pipelines/helpers'
import { PipelineConfig, ResultHandlingPipeline } from './pipelines/result-handling-pipeline'
import { MemoryRateLimiter } from './utils/overflow-detector'
const ingestionEventOverflowed = new Counter({
@@ -113,7 +115,7 @@ export class IngestionConsumer {
private deduplicationRedis: DeduplicationRedis
public readonly promiseScheduler = new PromiseScheduler()
private preprocessingPipeline: (message: Message) => Promise<PreprocessedEvent | null>
private batchPreprocessingPipeline!: ResultHandlingPipeline<{ message: Message }, PreprocessedEvent>
constructor(
private hub: Hub,
@@ -174,37 +176,6 @@ export class IngestionConsumer {
groupId: this.groupId,
topic: this.topic,
})
// Initialize preprocessing pipeline
this.preprocessingPipeline = async (message: Message) => {
const pipelineConfig: PipelineConfig = {
kafkaProducer: this.kafkaProducer!,
dlqTopic: this.dlqTopic,
promiseScheduler: this.promiseScheduler,
}
try {
const pipeline = ResultHandlingPipeline.of({ message }, message, pipelineConfig)
.pipe(createParseHeadersStep())
.pipe(createApplyDropRestrictionsStep(this.eventIngestionRestrictionManager))
.pipe(
createApplyForceOverflowRestrictionsStep(this.eventIngestionRestrictionManager, {
overflowEnabled: this.overflowEnabled(),
overflowTopic: this.overflowTopic || '',
preservePartitionLocality: this.hub.INGESTION_OVERFLOW_PRESERVE_PARTITION_LOCALITY,
})
)
.pipe(createParseKafkaMessageStep())
.pipeAsync(createResolveTeamStep(this.hub))
.pipe(createApplyPersonProcessingRestrictionsStep(this.eventIngestionRestrictionManager))
.pipeAsync(createValidateEventUuidStep(this.hub))
return await pipeline.unwrap()
} catch (error) {
console.error('Error processing message in pipeline:', error)
throw error
}
}
}
public get service(): PluginServerService {
@@ -227,6 +198,9 @@ export class IngestionConsumer {
}),
])
// Initialize batch preprocessing pipeline after kafka producer is available
this.initializePipeline()
await this.kafkaConsumer.connect(async (messages) => {
return await instrumentFn(
{
@@ -238,6 +212,39 @@ export class IngestionConsumer {
})
}
private initializePipeline(): void {
const pipelineConfig: PipelineConfig = {
kafkaProducer: this.kafkaProducer!,
dlqTopic: this.dlqTopic,
promiseScheduler: this.promiseScheduler,
}
// Create preprocessing pipeline
const preprocessingPipeline = createNewPipeline()
.pipe(createParseHeadersStep())
.pipe(createApplyDropRestrictionsStep(this.eventIngestionRestrictionManager))
.pipe(
createApplyForceOverflowRestrictionsStep(this.eventIngestionRestrictionManager, {
overflowEnabled: this.overflowEnabled(),
overflowTopic: this.overflowTopic || '',
preservePartitionLocality: this.hub.INGESTION_OVERFLOW_PRESERVE_PARTITION_LOCALITY,
})
)
.pipe(createParseKafkaMessageStep())
.pipeAsync(createResolveTeamStep(this.hub))
.pipe(createApplyPersonProcessingRestrictionsStep(this.eventIngestionRestrictionManager))
.pipeAsync(createValidateEventUuidStep(this.hub))
// Create the batch processing pipeline with fluent API
const batchPipeline = createNewBatchPipeline()
.pipeConcurrently(preprocessingPipeline)
.gather()
.pipeBatch(createApplyCookielessProcessingStep(this.hub))
// Wrap it in the result handling pipeline
this.batchPreprocessingPipeline = ResultHandlingPipeline.of(batchPipeline, pipelineConfig)
}
public async stop(): Promise<void> {
logger.info('🔁', `${this.name} - stopping`)
this.isStopping = true
@@ -308,10 +315,7 @@ export class IngestionConsumer {
preprocessedEvents.map((x) => x.event)
)
)
const postCookielessMessages = await this.runInstrumented('cookielessProcessing', () =>
this.hub.cookielessManager.doBatch(preprocessedEvents.map((x) => x.eventWithTeam))
)
const eventsPerDistinctId = this.groupEventsByDistinctId(postCookielessMessages)
const eventsPerDistinctId = this.groupEventsByDistinctId(preprocessedEvents.map((x) => x.eventWithTeam))
// Check if hogwatcher should be used (using the same sampling logic as in the transformer)
const shouldRunHogWatcher = Math.random() < this.hub.CDP_HOG_WATCHER_SAMPLE_RATE
@@ -610,12 +614,21 @@ export class IngestionConsumer {
}
private async preprocessEvents(messages: Message[]): Promise<PreprocessedEvent[]> {
const pipelinePromises = messages.map(async (message) => {
return await this.preprocessingPipeline(message)
})
// Create batch using the helper function
const batch = createBatch(messages)
const results = await Promise.all(pipelinePromises)
return results.filter((result): result is PreprocessedEvent => result !== null)
// Feed batch to the pipeline
this.batchPreprocessingPipeline.feed(batch)
// Get all results from the gather pipeline (should return all results in one call)
const result = await this.batchPreprocessingPipeline.next()
if (result === null) {
return []
}
// Return the results (already filtered to successful ones by ResultHandlingPipeline)
return result
}
private groupEventsByDistinctId(messages: IncomingEventWithTeam[]): IncomingEventsByDistinctId {

View File

@@ -0,0 +1,14 @@
import { PipelineResultWithContext } from './pipeline.interface'
/**
* Batch processing result type
*/
export type BatchPipelineResultWithContext<T> = PipelineResultWithContext<T>[]
/**
* Interface for batch processing pipelines
*/
export interface BatchPipeline<TInput, TIntermediate> {
feed(elements: BatchPipelineResultWithContext<TInput>): void
next(): Promise<BatchPipelineResultWithContext<TIntermediate> | null>
}

View File

@@ -0,0 +1,225 @@
import { Message } from 'node-rdkafka'
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { BufferingBatchPipeline } from './buffering-batch-pipeline'
import { dlq, drop, ok, redirect } from './results'
describe('BufferingBatchPipeline', () => {
let message1: Message
let message2: Message
let message3: Message
let context1: { message: Message }
let context2: { message: Message }
let context3: { message: Message }
beforeEach(() => {
// Create different mock messages with unique properties
message1 = {
topic: 'test-topic',
partition: 0,
offset: 1,
key: Buffer.from('key1'),
value: Buffer.from('value1'),
timestamp: Date.now(),
} as Message
message2 = {
topic: 'test-topic',
partition: 0,
offset: 2,
key: Buffer.from('key2'),
value: Buffer.from('value2'),
timestamp: Date.now() + 1,
} as Message
message3 = {
topic: 'test-topic',
partition: 0,
offset: 3,
key: Buffer.from('key3'),
value: Buffer.from('value3'),
timestamp: Date.now() + 2,
} as Message
context1 = { message: message1 }
context2 = { message: message2 }
context3 = { message: message3 }
})
describe('constructor', () => {
it('should create instance with default type', () => {
const pipeline = new BufferingBatchPipeline()
expect(pipeline).toBeInstanceOf(BufferingBatchPipeline)
})
it('should create instance with custom type', () => {
const pipeline = new BufferingBatchPipeline<string>()
expect(pipeline).toBeInstanceOf(BufferingBatchPipeline)
})
})
describe('feed', () => {
it('should add elements to buffer', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const batch: BatchPipelineResultWithContext<string> = [
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
]
pipeline.feed(batch)
// Buffer is internal, so we test through next()
const result = await pipeline.next()
expect(result).toEqual([
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
])
})
it('should accumulate multiple feeds', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('hello'), context: context1 }]
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('world'), context: context2 }]
pipeline.feed(batch1)
pipeline.feed(batch2)
const result = await pipeline.next()
expect(result).toEqual([
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
])
})
it('should handle empty batch', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const emptyBatch: BatchPipelineResultWithContext<string> = []
pipeline.feed(emptyBatch)
const result = await pipeline.next()
expect(result).toEqual(null)
})
})
describe('next', () => {
it('should return null when buffer is empty', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const result = await pipeline.next()
expect(result).toBeNull()
})
it('should return all buffered elements and clear buffer', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const batch: BatchPipelineResultWithContext<string> = [
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
]
pipeline.feed(batch)
const result1 = await pipeline.next()
const result2 = await pipeline.next()
expect(result1).toEqual([
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
])
expect(result2).toBeNull()
})
it('should handle mixed result types', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const dropResult = drop<string>('test drop')
const dlqResult = dlq<string>('test dlq', new Error('test error'))
const redirectResult = redirect<string>('test redirect', 'test-topic')
const batch: BatchPipelineResultWithContext<string> = [
{ result: ok('hello'), context: context1 },
{ result: dropResult, context: context2 },
{ result: dlqResult, context: context3 },
{ result: redirectResult, context: context1 },
]
pipeline.feed(batch)
const result = await pipeline.next()
const result2 = await pipeline.next()
expect(result).toEqual([
{ result: ok('hello'), context: context1 },
{ result: dropResult, context: context2 },
{ result: dlqResult, context: context3 },
{ result: redirectResult, context: context1 },
])
expect(result2).toBeNull()
})
it('should preserve order of fed elements', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('first'), context: context1 }]
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('second'), context: context2 }]
const batch3: BatchPipelineResultWithContext<string> = [{ result: ok('third'), context: context3 }]
pipeline.feed(batch1)
pipeline.feed(batch2)
pipeline.feed(batch3)
const result = await pipeline.next()
const result2 = await pipeline.next()
expect(result).toEqual([
{ result: ok('first'), context: context1 },
{ result: ok('second'), context: context2 },
{ result: ok('third'), context: context3 },
])
expect(result2).toBeNull()
})
it('should handle large number of elements', async () => {
const pipeline = new BufferingBatchPipeline<string>()
const batch: BatchPipelineResultWithContext<string> = []
for (let i = 0; i < 100; i++) {
batch.push({ result: ok(`item${i}`), context: context1 })
}
pipeline.feed(batch)
const result = await pipeline.next()
const result2 = await pipeline.next()
expect(result).toHaveLength(100)
expect(result![0]).toEqual({ result: ok('item0'), context: context1 })
expect(result![99]).toEqual({ result: ok('item99'), context: context1 })
expect(result2).toBeNull()
})
it('should resume after returning null when more elements are fed', async () => {
const pipeline = new BufferingBatchPipeline<string>()
// First round: feed and process
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('first'), context: context1 }]
pipeline.feed(batch1)
const result1 = await pipeline.next()
expect(result1).toEqual([{ result: ok('first'), context: context1 }])
// Should return null when buffer is empty
const result2 = await pipeline.next()
expect(result2).toBeNull()
// Feed more elements
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('second'), context: context2 }]
pipeline.feed(batch2)
// Should resume processing
const result3 = await pipeline.next()
expect(result3).toEqual([{ result: ok('second'), context: context2 }])
// Should return null again
const result4 = await pipeline.next()
expect(result4).toBeNull()
})
})
})

View File

@@ -0,0 +1,26 @@
import { Message } from 'node-rdkafka'
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { ConcurrentBatchProcessingPipeline } from './concurrent-batch-pipeline'
import { Pipeline } from './pipeline.interface'
export class BufferingBatchPipeline<T = { message: Message }> implements BatchPipeline<T, T> {
private buffer: BatchPipelineResultWithContext<T> = []
feed(elements: BatchPipelineResultWithContext<T>): void {
this.buffer.push(...elements)
}
async next(): Promise<BatchPipelineResultWithContext<T> | null> {
if (this.buffer.length === 0) {
return null
}
const results = this.buffer
this.buffer = []
return Promise.resolve(results)
}
pipeConcurrently<U>(processor: Pipeline<T, U>): ConcurrentBatchProcessingPipeline<T, T, U> {
return new ConcurrentBatchProcessingPipeline(processor, this)
}
}

View File

@@ -0,0 +1,313 @@
import { Message } from 'node-rdkafka'
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { ConcurrentBatchProcessingPipeline } from './concurrent-batch-pipeline'
import { createNewBatchPipeline, createNewPipeline } from './helpers'
import { dlq, drop, ok, redirect } from './results'
describe('ConcurrentBatchProcessingPipeline', () => {
let message1: Message
let message2: Message
let message3: Message
let context1: { message: Message }
let context2: { message: Message }
let context3: { message: Message }
beforeEach(() => {
// Create different mock messages with unique properties
message1 = {
topic: 'test-topic',
partition: 0,
offset: 1,
key: Buffer.from('key1'),
value: Buffer.from('value1'),
timestamp: Date.now(),
} as Message
message2 = {
topic: 'test-topic',
partition: 0,
offset: 2,
key: Buffer.from('key2'),
value: Buffer.from('value2'),
timestamp: Date.now() + 1,
} as Message
message3 = {
topic: 'test-topic',
partition: 0,
offset: 3,
key: Buffer.from('key3'),
value: Buffer.from('value3'),
timestamp: Date.now() + 2,
} as Message
context1 = { message: message1 }
context2 = { message: message2 }
context3 = { message: message3 }
})
describe('constructor', () => {
it('should create instance with processor and previous pipeline', () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
const previousPipeline = createNewBatchPipeline<string>()
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
expect(pipeline).toBeInstanceOf(ConcurrentBatchProcessingPipeline)
})
})
describe('feed', () => {
it('should delegate to previous pipeline', () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
const previousPipeline = createNewBatchPipeline<string>()
const spy = jest.spyOn(previousPipeline, 'feed')
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const testBatch: BatchPipelineResultWithContext<string> = [{ result: ok('test'), context: context1 }]
pipeline.feed(testBatch)
expect(spy).toHaveBeenCalledWith(testBatch)
})
})
describe('next', () => {
it('should return null when no results available', async () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
const previousPipeline = createNewBatchPipeline<string>()
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const result = await pipeline.next()
expect(result).toBeNull()
})
it('should process successful results concurrently', async () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
const previousPipeline = createNewBatchPipeline<string>()
// Feed some test data
const testBatch: BatchPipelineResultWithContext<string> = [
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const result1 = await pipeline.next()
const result2 = await pipeline.next()
const result3 = await pipeline.next()
expect(result1).toEqual([{ result: ok('HELLO'), context: context1 }])
expect(result2).toEqual([{ result: ok('WORLD'), context: context2 }])
expect(result3).toBeNull()
})
it('should preserve non-success results without processing', async () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
const dropResult = drop<string>('test drop')
const dlqResult = dlq<string>('test dlq', new Error('test error'))
const redirectResult = redirect<string>('test redirect', 'test-topic')
const previousPipeline = createNewBatchPipeline<string>()
const testBatch: BatchPipelineResultWithContext<string> = [
{ result: dropResult, context: context1 },
{ result: dlqResult, context: context2 },
{ result: redirectResult, context: context3 },
]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const result1 = await pipeline.next()
const result2 = await pipeline.next()
const result3 = await pipeline.next()
const result4 = await pipeline.next()
expect(result1).toEqual([{ result: dropResult, context: context1 }])
expect(result2).toEqual([{ result: dlqResult, context: context2 }])
expect(result3).toEqual([{ result: redirectResult, context: context3 }])
expect(result4).toBeNull()
})
it('should handle mixed success and non-success results', async () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
const dropResult = drop<string>('test drop')
const previousPipeline = createNewBatchPipeline<string>()
const testBatch: BatchPipelineResultWithContext<string> = [
{ result: ok('hello'), context: context1 },
{ result: dropResult, context: context2 },
{ result: ok('world'), context: context3 },
]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const result1 = await pipeline.next()
const result2 = await pipeline.next()
const result3 = await pipeline.next()
const result4 = await pipeline.next()
expect(result1).toEqual([{ result: ok('HELLO'), context: context1 }])
expect(result2).toEqual([{ result: dropResult, context: context2 }])
expect(result3).toEqual([{ result: ok('WORLD'), context: context3 }])
expect(result4).toBeNull()
})
it('should handle async processing delays correctly', async () => {
const processor = createNewPipeline<string>().pipeAsync(async (input: string) => {
// Simulate async delay
await new Promise((resolve) => setTimeout(resolve, 10))
return ok(input.toUpperCase())
})
const previousPipeline = createNewBatchPipeline<string>()
const testBatch: BatchPipelineResultWithContext<string> = [
{ result: ok('fast'), context: context1 },
{ result: ok('slow'), context: context2 },
]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const startTime = Date.now()
const result1 = await pipeline.next()
const result2 = await pipeline.next()
const endTime = Date.now()
expect(result1).toEqual([{ result: ok('FAST'), context: context1 }])
expect(result2).toEqual([{ result: ok('SLOW'), context: context2 }])
// Both should complete around the same time due to concurrent processing
expect(endTime - startTime).toBeLessThan(50) // Should be much less than 20ms
})
it('should handle processor errors gracefully', async () => {
const processor = createNewPipeline<string>().pipeAsync((_input: string) => {
return Promise.reject(new Error('Processor error'))
})
const previousPipeline = createNewBatchPipeline<string>()
const testBatch: BatchPipelineResultWithContext<string> = [{ result: ok('test'), context: context1 }]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
await expect(pipeline.next()).rejects.toThrow('Processor error')
})
it('should process multiple batches sequentially', async () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
const previousPipeline = createNewBatchPipeline<string>()
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('batch1'), context: context1 }]
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('batch2'), context: context2 }]
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
// First batch: feed then next
previousPipeline.feed(batch1)
const result1 = await pipeline.next()
expect(result1).toEqual([{ result: ok('BATCH1'), context: context1 }])
// Second batch: feed then next
previousPipeline.feed(batch2)
const result2 = await pipeline.next()
expect(result2).toEqual([{ result: ok('BATCH2'), context: context2 }])
// Third call should return null
const result3 = await pipeline.next()
expect(result3).toBeNull()
})
it('should maintain promise queue state between calls', async () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
const previousPipeline = createNewBatchPipeline<string>()
const testBatch: BatchPipelineResultWithContext<string> = [
{ result: ok('item1'), context: context1 },
{ result: ok('item2'), context: context2 },
{ result: ok('item3'), context: context3 },
]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
// First call should process first item
const result1 = await pipeline.next()
expect(result1).toEqual([{ result: ok('ITEM1'), context: context1 }])
// Second call should process second item
const result2 = await pipeline.next()
expect(result2).toEqual([{ result: ok('ITEM2'), context: context2 }])
// Third call should process third item
const result3 = await pipeline.next()
expect(result3).toEqual([{ result: ok('ITEM3'), context: context3 }])
// Fourth call should return null
const result4 = await pipeline.next()
expect(result4).toBeNull()
})
})
describe('gather', () => {
it('should return GatheringBatchPipeline instance', () => {
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
const previousPipeline = createNewBatchPipeline<string>()
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
const gatherPipeline = pipeline.gather()
expect(gatherPipeline).toBeDefined()
expect(gatherPipeline.constructor.name).toBe('GatheringBatchPipeline')
})
})
describe('concurrent processing behavior', () => {
it('should process items concurrently within a batch', async () => {
const processingOrder: string[] = []
const processor = createNewPipeline<string>().pipeAsync(async (input: string) => {
processingOrder.push(`start-${input}`)
// Simulate different processing times
const delay = input === 'slow' ? 50 : 10
await new Promise((resolve) => setTimeout(resolve, delay))
processingOrder.push(`end-${input}`)
return ok(input.toUpperCase())
})
const previousPipeline = createNewBatchPipeline<string>()
const testBatch: BatchPipelineResultWithContext<string> = [
{ result: ok('fast'), context: context1 },
{ result: ok('slow'), context: context2 },
{ result: ok('medium'), context: context3 },
]
previousPipeline.feed(testBatch)
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
// Process the batch
const result1 = await pipeline.next()
const result2 = await pipeline.next()
const result3 = await pipeline.next()
// Verify results
expect(result1).toEqual([{ result: ok('FAST'), context: context1 }])
expect(result2).toEqual([{ result: ok('SLOW'), context: context2 }])
expect(result3).toEqual([{ result: ok('MEDIUM'), context: context3 }])
// Verify concurrent processing (all starts before any end)
expect(processingOrder).toEqual([
'start-fast',
'start-slow',
'start-medium',
'end-fast',
'end-medium',
'end-slow',
])
})
})
})

View File

@@ -0,0 +1,55 @@
import { instrumentFn } from '../../common/tracing/tracing-utils'
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { GatheringBatchPipeline } from './gathering-batch-pipeline'
import { Pipeline, PipelineResultWithContext } from './pipeline.interface'
import { isOkResult } from './results'
export class ConcurrentBatchProcessingPipeline<TInput, TIntermediate, TOutput>
implements BatchPipeline<TInput, TOutput>
{
private promiseQueue: Promise<PipelineResultWithContext<TOutput>>[] = []
constructor(
private processor: Pipeline<TIntermediate, TOutput>,
private previousPipeline: BatchPipeline<TInput, TIntermediate>
) {}
feed(elements: BatchPipelineResultWithContext<TInput>): void {
this.previousPipeline.feed(elements)
}
async next(): Promise<BatchPipelineResultWithContext<TOutput> | null> {
const previousResults = await this.previousPipeline.next()
if (previousResults !== null) {
const processorName = this.processor.constructor.name || 'anonymousProcessor'
previousResults.forEach((resultWithContext) => {
const result = resultWithContext.result
if (isOkResult(result)) {
const promise = instrumentFn(processorName, () => this.processor.process(resultWithContext))
this.promiseQueue.push(promise)
} else {
this.promiseQueue.push(
Promise.resolve({
result: result,
context: resultWithContext.context,
})
)
}
})
}
const promise = this.promiseQueue.shift()
if (promise === undefined) {
return null
}
const resultWithContext = await promise
return [resultWithContext]
}
gather(): GatheringBatchPipeline<TInput, TOutput> {
return new GatheringBatchPipeline(this)
}
}

View File

@@ -0,0 +1,268 @@
import { Message } from 'node-rdkafka'
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { GatheringBatchPipeline } from './gathering-batch-pipeline'
import { createNewBatchPipeline } from './helpers'
import { dlq, drop, ok, redirect } from './results'
// Mock batch processing pipeline for testing
class MockBatchProcessingPipeline<T> implements BatchPipeline<T, T> {
private results: BatchPipelineResultWithContext<T>[] = []
private currentIndex = 0
constructor(results: BatchPipelineResultWithContext<T>[]) {
this.results = results
}
feed(elements: BatchPipelineResultWithContext<T>): void {
this.results.push(elements)
}
async next(): Promise<BatchPipelineResultWithContext<T> | null> {
if (this.currentIndex >= this.results.length) {
return Promise.resolve(null)
}
return Promise.resolve(this.results[this.currentIndex++])
}
}
describe('GatheringBatchPipeline', () => {
let message1: Message
let message2: Message
let message3: Message
let context1: { message: Message }
let context2: { message: Message }
let context3: { message: Message }
beforeEach(() => {
// Create different mock messages with unique properties
message1 = {
topic: 'test-topic',
partition: 0,
offset: 1,
key: Buffer.from('key1'),
value: Buffer.from('value1'),
timestamp: Date.now(),
} as Message
message2 = {
topic: 'test-topic',
partition: 0,
offset: 2,
key: Buffer.from('key2'),
value: Buffer.from('value2'),
timestamp: Date.now() + 1,
} as Message
message3 = {
topic: 'test-topic',
partition: 0,
offset: 3,
key: Buffer.from('key3'),
value: Buffer.from('value3'),
timestamp: Date.now() + 2,
} as Message
context1 = { message: message1 }
context2 = { message: message2 }
context3 = { message: message3 }
})
describe('constructor', () => {
it('should create instance with sub-pipeline', () => {
const subPipeline = createNewBatchPipeline<string>()
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
expect(gatherPipeline).toBeInstanceOf(GatheringBatchPipeline)
})
})
describe('feed', () => {
it('should delegate to sub-pipeline', () => {
const subPipeline = createNewBatchPipeline<string>()
const spy = jest.spyOn(subPipeline, 'feed')
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const testBatch: BatchPipelineResultWithContext<string> = [{ result: ok('test'), context: context1 }]
gatherPipeline.feed(testBatch)
expect(spy).toHaveBeenCalledWith(testBatch)
})
})
describe('next', () => {
it('should return null when no results available', async () => {
const subPipeline = createNewBatchPipeline<string>()
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
expect(result).toBeNull()
})
it('should gather all results from sub-pipeline in single call', async () => {
const subPipeline = new MockBatchProcessingPipeline([
[{ result: ok('hello'), context: context1 }],
[{ result: ok('world'), context: context2 }],
[{ result: ok('test'), context: context3 }],
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
const result2 = await gatherPipeline.next()
expect(result).toEqual([
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
{ result: ok('test'), context: context3 },
])
expect(result2).toBeNull()
})
it('should preserve non-success results', async () => {
const dropResult = drop<string>('test drop')
const dlqResult = dlq<string>('test dlq', new Error('test error'))
const redirectResult = redirect<string>('test redirect', 'test-topic')
const subPipeline = new MockBatchProcessingPipeline([
[{ result: dropResult, context: context1 }],
[{ result: dlqResult, context: context2 }],
[{ result: redirectResult, context: context3 }],
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
const result2 = await gatherPipeline.next()
expect(result).toEqual([
{ result: dropResult, context: context1 },
{ result: dlqResult, context: context2 },
{ result: redirectResult, context: context3 },
])
expect(result2).toBeNull()
})
it('should handle mixed success and non-success results', async () => {
const dropResult = drop<string>('test drop')
const subPipeline = new MockBatchProcessingPipeline([
[{ result: ok('hello'), context: context1 }],
[{ result: dropResult, context: context2 }],
[{ result: ok('world'), context: context3 }],
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
const result2 = await gatherPipeline.next()
expect(result).toEqual([
{ result: ok('hello'), context: context1 },
{ result: dropResult, context: context2 },
{ result: ok('world'), context: context3 },
])
expect(result2).toBeNull()
})
it('should handle empty batches from sub-pipeline', async () => {
const subPipeline = new MockBatchProcessingPipeline([
[], // Empty batch
[{ result: ok('hello'), context: context1 }],
[], // Another empty batch
[{ result: ok('world'), context: context2 }],
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
const result2 = await gatherPipeline.next()
expect(result).toEqual([
{ result: ok('hello'), context: context1 },
{ result: ok('world'), context: context2 },
])
expect(result2).toBeNull()
})
it('should return null when all batches are empty', async () => {
const subPipeline = new MockBatchProcessingPipeline([
[], // Empty batch
[], // Another empty batch
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
expect(result).toBeNull()
})
it('should preserve order of results from sub-pipeline', async () => {
const subPipeline = new MockBatchProcessingPipeline([
[{ result: ok('first'), context: context1 }],
[{ result: ok('second'), context: context2 }],
[{ result: ok('third'), context: context3 }],
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
expect(result).toEqual([
{ result: ok('first'), context: context1 },
{ result: ok('second'), context: context2 },
{ result: ok('third'), context: context3 },
])
})
it('should handle large number of batches', async () => {
const batches: BatchPipelineResultWithContext<string>[] = []
for (let i = 0; i < 10; i++) {
batches.push([{ result: ok(`item${i}`), context: context1 }])
}
const subPipeline = new MockBatchProcessingPipeline(batches)
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
const result = await gatherPipeline.next()
const result2 = await gatherPipeline.next()
expect(result).toHaveLength(10)
expect(result![0]).toEqual({ result: ok('item0'), context: context1 })
expect(result![9]).toEqual({ result: ok('item9'), context: context1 })
expect(result2).toBeNull()
})
it('should resume after returning null when more batches are fed', async () => {
const subPipeline = new MockBatchProcessingPipeline([
[{ result: ok('first'), context: context1 }],
[{ result: ok('second'), context: context2 }],
])
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
// First round: process initial batches
const result1 = await gatherPipeline.next()
expect(result1).toEqual([
{ result: ok('first'), context: context1 },
{ result: ok('second'), context: context2 },
])
// Should return null when exhausted
const result2 = await gatherPipeline.next()
expect(result2).toBeNull()
// Feed more batches
subPipeline.feed([{ result: ok('third'), context: context3 }])
// Should resume processing
const result3 = await gatherPipeline.next()
expect(result3).toEqual([{ result: ok('third'), context: context3 }])
// Should return null again
const result4 = await gatherPipeline.next()
expect(result4).toBeNull()
})
})
})

View File

@@ -0,0 +1,45 @@
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { isOkResult, ok } from './results'
import { BatchProcessingStep, SequentialBatchPipeline } from './sequential-batch-pipeline'
export class GatheringBatchPipeline<TInput, TOutput> implements BatchPipeline<TInput, TOutput> {
constructor(private subPipeline: BatchPipeline<TInput, TOutput>) {}
feed(elements: BatchPipelineResultWithContext<TInput>): void {
this.subPipeline.feed(elements)
}
async next(): Promise<BatchPipelineResultWithContext<TOutput> | null> {
const allResults: BatchPipelineResultWithContext<TOutput> = []
// Loop and collect all results from sub-pipeline
let result = await this.subPipeline.next()
while (result !== null) {
// Collect all results in order, preserving context
result.forEach((resultWithContext) => {
if (isOkResult(resultWithContext.result)) {
allResults.push({
result: ok(resultWithContext.result.value),
context: resultWithContext.context,
})
} else {
allResults.push(resultWithContext)
}
})
result = await this.subPipeline.next()
}
// Return all collected results, or null if no results
if (allResults.length === 0) {
return null
}
return allResults
}
pipeBatch<U>(step: BatchProcessingStep<TOutput, U>): SequentialBatchPipeline<TInput, TOutput, U> {
return new SequentialBatchPipeline(step, this)
}
}

View File

@@ -0,0 +1,30 @@
import { Message } from 'node-rdkafka'
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { BufferingBatchPipeline } from './buffering-batch-pipeline'
import { ok } from './results'
import { StartPipeline } from './start-pipeline'
/**
* Helper function to create a new processing pipeline for single items
*/
export function createNewPipeline<T = { message: Message }>(): StartPipeline<T> {
return new StartPipeline<T>()
}
/**
* Helper function to create a new batch processing pipeline starting with a root pipeline
*/
export function createNewBatchPipeline<T = { message: Message }>(): BufferingBatchPipeline<T> {
return new BufferingBatchPipeline<T>()
}
/**
* Helper function to create a batch of ResultWithContext from Kafka messages
*/
export function createBatch(messages: Message[]): BatchPipelineResultWithContext<{ message: Message }> {
return messages.map((message) => ({
result: ok({ message }),
context: { message },
}))
}

View File

@@ -0,0 +1,25 @@
import { Message } from 'node-rdkafka'
import { PipelineResult } from './results'
/**
* Processing context that carries message through pipeline transformations
*/
export interface PipelineContext {
message: Message
}
/**
* Result with context wrapper that carries both the pipeline result and processing context
*/
export interface PipelineResultWithContext<T> {
result: PipelineResult<T>
context: PipelineContext
}
/**
* Interface for single-item processors
*/
export interface Pipeline<TInput, TOutput> {
process(input: PipelineResultWithContext<TInput>): Promise<PipelineResultWithContext<TOutput>>
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,369 @@
import { Message } from 'node-rdkafka'
import { KafkaProducerWrapper } from '../../kafka/producer'
import { PromiseScheduler } from '../../utils/promise-scheduler'
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from '../../worker/ingestion/pipeline-helpers'
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { createNewBatchPipeline } from './helpers'
import { PipelineConfig, ResultHandlingPipeline } from './result-handling-pipeline'
import { dlq, drop, ok, redirect } from './results'
// Mock the pipeline helpers
jest.mock('../../worker/ingestion/pipeline-helpers', () => ({
logDroppedMessage: jest.fn(),
redirectMessageToTopic: jest.fn(),
sendMessageToDLQ: jest.fn(),
}))
const mockLogDroppedMessage = logDroppedMessage as jest.MockedFunction<typeof logDroppedMessage>
const mockRedirectMessageToTopic = redirectMessageToTopic as jest.MockedFunction<typeof redirectMessageToTopic>
const mockSendMessageToDLQ = sendMessageToDLQ as jest.MockedFunction<typeof sendMessageToDLQ>
describe('ResultHandlingPipeline', () => {
let mockKafkaProducer: KafkaProducerWrapper
let mockPromiseScheduler: PromiseScheduler
let config: PipelineConfig
beforeEach(() => {
jest.clearAllMocks()
mockKafkaProducer = {
producer: {} as any,
queueMessages: jest.fn(),
} as unknown as KafkaProducerWrapper
mockPromiseScheduler = {
schedule: jest.fn(),
} as unknown as PromiseScheduler
config = {
kafkaProducer: mockKafkaProducer,
dlqTopic: 'test-dlq',
promiseScheduler: mockPromiseScheduler,
}
})
describe('basic functionality', () => {
it('should process successful results and return values', async () => {
const messages: Message[] = [
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('test2'), topic: 'test', partition: 0, offset: 2 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
{ result: ok({ processed: 'test2' }), context: { message: messages[1] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test2' }])
})
it('should handle empty batch', async () => {
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed([])
const results = await resultPipeline.next()
expect(results).toBeNull()
})
})
describe('result handling', () => {
it('should filter out dropped results and log them', async () => {
const messages: Message[] = [
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('drop'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('test3'), topic: 'test', partition: 0, offset: 3 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
{ result: drop('test drop reason'), context: { message: messages[1] } },
{ result: ok({ processed: 'test3' }), context: { message: messages[2] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test3' }])
expect(mockLogDroppedMessage).toHaveBeenCalledWith(messages[1], 'test drop reason', 'result_handler')
})
it('should filter out redirected results and redirect them', async () => {
const messages: Message[] = [
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('redirect'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('test3'), topic: 'test', partition: 0, offset: 3 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
{ result: redirect('test redirect', 'overflow-topic', true, false), context: { message: messages[1] } },
{ result: ok({ processed: 'test3' }), context: { message: messages[2] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test3' }])
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
mockKafkaProducer,
mockPromiseScheduler,
messages[1],
'overflow-topic',
'result_handler',
true,
false
)
})
it('should filter out dlq results and send to DLQ', async () => {
const messages: Message[] = [
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('test3'), topic: 'test', partition: 0, offset: 3 } as Message,
]
const testError = new Error('test error')
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
{ result: dlq('test dlq reason', testError), context: { message: messages[1] } },
{ result: ok({ processed: 'test3' }), context: { message: messages[2] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test3' }])
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
messages[1],
testError,
'result_handler',
'test-dlq'
)
})
it('should handle dlq result without error and create default error', async () => {
const messages: Message[] = [
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 1 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: dlq('test dlq reason'), context: { message: messages[0] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([])
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
messages[0],
expect.any(Error),
'result_handler',
'test-dlq'
)
const errorArg = (mockSendMessageToDLQ as jest.Mock).mock.calls[0][2]
expect(errorArg.message).toBe('test dlq reason')
})
it('should handle mixed results correctly', async () => {
const messages: Message[] = [
{ value: Buffer.from('success1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('drop'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('success2'), topic: 'test', partition: 0, offset: 3 } as Message,
{ value: Buffer.from('redirect'), topic: 'test', partition: 0, offset: 4 } as Message,
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 5 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: ok({ processed: 'success1' }), context: { message: messages[0] } },
{ result: drop('dropped item'), context: { message: messages[1] } },
{ result: ok({ processed: 'success2' }), context: { message: messages[2] } },
{ result: redirect('redirected item', 'overflow-topic'), context: { message: messages[3] } },
{ result: dlq('dlq item', new Error('processing error')), context: { message: messages[4] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([{ processed: 'success1' }, { processed: 'success2' }])
// Verify all non-success results were handled
expect(mockLogDroppedMessage).toHaveBeenCalledWith(messages[1], 'dropped item', 'result_handler')
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
mockKafkaProducer,
mockPromiseScheduler,
messages[3],
'overflow-topic',
'result_handler',
true,
true
)
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
messages[4],
expect.any(Error),
'result_handler',
'test-dlq'
)
})
})
describe('concurrent processing', () => {
it('should handle concurrent processing results', async () => {
const messages: Message[] = [
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('2'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: ok({ count: 2 }), context: { message: messages[0] } },
{ result: ok({ count: 4 }), context: { message: messages[1] } },
{ result: ok({ count: 6 }), context: { message: messages[2] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([{ count: 2 }, { count: 4 }, { count: 6 }])
})
})
describe('redirect result with default parameters', () => {
it('should use default preserveKey and awaitAck when not specified', async () => {
const messages: Message[] = [
{ value: Buffer.from('redirect'), topic: 'test', partition: 0, offset: 1 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: redirect('test redirect', 'overflow-topic'), context: { message: messages[0] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([])
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
mockKafkaProducer,
mockPromiseScheduler,
messages[0],
'overflow-topic',
'result_handler',
true, // default preserveKey
true // default awaitAck
)
})
})
})
describe('Integration tests', () => {
let mockKafkaProducer: KafkaProducerWrapper
let mockPromiseScheduler: PromiseScheduler
let config: PipelineConfig
beforeEach(() => {
jest.clearAllMocks()
mockKafkaProducer = {
producer: {} as any,
queueMessages: jest.fn(),
} as unknown as KafkaProducerWrapper
mockPromiseScheduler = {
schedule: jest.fn(),
} as unknown as PromiseScheduler
config = {
kafkaProducer: mockKafkaProducer,
dlqTopic: 'test-dlq',
promiseScheduler: mockPromiseScheduler,
}
})
it('should handle realistic event processing pipeline', async () => {
const messages: Message[] = [
{ value: Buffer.from('test-event'), topic: 'test', partition: 0, offset: 1 } as Message,
]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{
result: ok({
eventType: 'pageview',
userId: 'user123',
isValid: true,
timestamp: '2023-01-01T00:00:00Z',
}),
context: { message: messages[0] },
},
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([
{
eventType: 'pageview',
userId: 'user123',
isValid: true,
timestamp: '2023-01-01T00:00:00Z',
},
])
})
it('should handle pipeline failure at different stages', async () => {
const messages: Message[] = [{ value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message]
// Create batch results directly
const batchResults: BatchPipelineResultWithContext<any> = [
{ result: dlq('Validation failed', new Error('Invalid data')), context: { message: messages[0] } },
]
const pipeline = createNewBatchPipeline()
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
resultPipeline.feed(batchResults)
const results = await resultPipeline.next()
expect(results).toEqual([])
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
messages[0],
expect.any(Error),
'result_handler',
'test-dlq'
)
})
})

View File

@@ -0,0 +1,88 @@
import { Message } from 'node-rdkafka'
import { KafkaProducerWrapper } from '../../kafka/producer'
import { PromiseScheduler } from '../../utils/promise-scheduler'
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from '../../worker/ingestion/pipeline-helpers'
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { PipelineResult, isDlqResult, isDropResult, isOkResult, isRedirectResult } from './results'
export type PipelineConfig = {
kafkaProducer: KafkaProducerWrapper
dlqTopic: string
promiseScheduler: PromiseScheduler
}
/**
* Unified result handling pipeline that wraps any BatchProcessingPipeline and handles
* non-success results (DLQ, DROP, REDIRECT) while filtering to only successful values.
*/
export class ResultHandlingPipeline<TInput, TOutput> {
constructor(
private pipeline: BatchPipeline<TInput, TOutput>,
private config: PipelineConfig
) {}
feed(elements: BatchPipelineResultWithContext<TInput>): void {
this.pipeline.feed(elements)
}
async next(): Promise<TOutput[] | null> {
const results = await this.pipeline.next()
if (results === null) {
return null
}
// Process results and handle non-success cases
const processedResults: TOutput[] = []
for (const resultWithContext of results) {
if (isOkResult(resultWithContext.result)) {
const value = resultWithContext.result.value as TOutput
processedResults.push(value)
} else {
// For non-success results, get the message from context
const originalMessage = resultWithContext.context.message
await this.handleNonSuccessResult(resultWithContext.result, originalMessage, 'result_handler')
}
}
// Return only successful results
return processedResults
}
private async handleNonSuccessResult(
result: PipelineResult<TOutput>,
originalMessage: Message,
stepName: string
): Promise<void> {
if (isDlqResult(result)) {
await sendMessageToDLQ(
this.config.kafkaProducer,
originalMessage,
result.error || new Error(result.reason),
stepName,
this.config.dlqTopic
)
} else if (isDropResult(result)) {
logDroppedMessage(originalMessage, result.reason, stepName)
} else if (isRedirectResult(result)) {
await redirectMessageToTopic(
this.config.kafkaProducer,
this.config.promiseScheduler,
originalMessage,
result.topic,
stepName,
result.preserveKey ?? true,
result.awaitAck ?? true
)
}
}
static of<TInput, TOutput>(
pipeline: BatchPipeline<TInput, TOutput>,
config: PipelineConfig
): ResultHandlingPipeline<TInput, TOutput> {
return new ResultHandlingPipeline(pipeline, config)
}
}

View File

@@ -0,0 +1,70 @@
export enum PipelineResultType {
OK,
DLQ,
DROP,
REDIRECT,
}
/**
* Generic result type for pipeline steps that can succeed, be dropped, or sent to DLQ
*/
export type PipelineResultOk<T> = { type: PipelineResultType.OK; value: T }
export type PipelineResultDlq = { type: PipelineResultType.DLQ; reason: string; error: unknown }
export type PipelineResultDrop = { type: PipelineResultType.DROP; reason: string }
export type PipelineResultRedirect = {
type: PipelineResultType.REDIRECT
reason: string
topic: string
preserveKey?: boolean
awaitAck?: boolean
}
export type PipelineResult<T> = PipelineResultOk<T> | PipelineResultDlq | PipelineResultDrop | PipelineResultRedirect
/**
* Helper functions for creating pipeline step results
*/
export function ok<T>(value: T): PipelineResult<T> {
return { type: PipelineResultType.OK, value }
}
export function dlq<T>(reason: string, error?: any): PipelineResult<T> {
return { type: PipelineResultType.DLQ, reason, error }
}
export function drop<T>(reason: string): PipelineResult<T> {
return { type: PipelineResultType.DROP, reason }
}
export function redirect<T>(
reason: string,
topic: string,
preserveKey: boolean = true,
awaitAck: boolean = true
): PipelineResult<T> {
return {
type: PipelineResultType.REDIRECT,
reason,
topic,
preserveKey,
awaitAck,
}
}
/**
* Type guard functions
*/
export function isOkResult<T>(result: PipelineResult<T>): result is PipelineResultOk<T> {
return result.type === PipelineResultType.OK
}
export function isDlqResult<T>(result: PipelineResult<T>): result is PipelineResultDlq {
return result.type === PipelineResultType.DLQ
}
export function isDropResult<T>(result: PipelineResult<T>): result is PipelineResultDrop {
return result.type === PipelineResultType.DROP
}
export function isRedirectResult<T>(result: PipelineResult<T>): result is PipelineResultRedirect {
return result.type === PipelineResultType.REDIRECT
}

View File

@@ -0,0 +1,214 @@
import { Message } from 'node-rdkafka'
import { createBatch, createNewBatchPipeline } from './helpers'
import { dlq, drop, ok } from './results'
import { SequentialBatchPipeline } from './sequential-batch-pipeline'
describe('SequentialBatchPipeline', () => {
describe('basic functionality', () => {
it('should process batch through pipeline', async () => {
const messages: Message[] = [
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('test2'), topic: 'test', partition: 0, offset: 2 } as Message,
]
const batch = createBatch(messages)
const rootPipeline = createNewBatchPipeline()
const pipeline = new SequentialBatchPipeline((items: any[]) => {
return Promise.resolve(items.map((item: any) => ok({ processed: item.message.value?.toString() })))
}, rootPipeline)
pipeline.feed(batch)
const results = await pipeline.next()
expect(results).toEqual([
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
{ result: ok({ processed: 'test2' }), context: { message: messages[1] } },
])
})
it('should handle empty batch', async () => {
const rootPipeline = createNewBatchPipeline()
const pipeline = new SequentialBatchPipeline((items: any[]) => {
return Promise.resolve(items.map((item: any) => ok(item)))
}, rootPipeline)
pipeline.feed([])
const results = await pipeline.next()
expect(results).toEqual(null)
})
})
describe('pipe() - batch operations', () => {
it('should execute batch step on all successful values', async () => {
const messages: Message[] = [
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('2'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
]
const batch = createBatch(messages)
const rootPipeline = createNewBatchPipeline()
const pipeline = new SequentialBatchPipeline((items: any[]) => {
return Promise.resolve(
items.map((item: any) => ok({ count: parseInt(item.message.value?.toString() || '0') * 2 }))
)
}, rootPipeline)
pipeline.feed(batch)
const results = await pipeline.next()
expect(results).toEqual([
{ result: ok({ count: 2 }), context: { message: messages[0] } },
{ result: ok({ count: 4 }), context: { message: messages[1] } },
{ result: ok({ count: 6 }), context: { message: messages[2] } },
])
})
it('should preserve non-success results and only process successful ones', async () => {
const messages: Message[] = [
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('drop'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 4 } as Message,
]
const batch = createBatch(messages)
const rootPipeline = createNewBatchPipeline()
const firstPipeline = new SequentialBatchPipeline((items: any[]) => {
return Promise.resolve(
items.map((item: any) => {
const value = item.message.value?.toString() || ''
if (value === 'drop') {
return drop('dropped item')
}
if (value === 'dlq') {
return dlq('dlq item', new Error('test error'))
}
return ok({ count: parseInt(value) })
})
)
}, rootPipeline)
const secondPipeline = new SequentialBatchPipeline((items: any[]) => {
// Should only receive successful items
expect(items).toEqual([{ count: 1 }, { count: 3 }])
return Promise.resolve(items.map((item: any) => ok({ count: item.count * 2 })))
}, firstPipeline)
secondPipeline.feed(batch)
const results = await secondPipeline.next()
expect(results).toEqual([
{ result: ok({ count: 2 }), context: { message: messages[0] } },
{ result: drop('dropped item'), context: { message: messages[1] } },
{ result: ok({ count: 6 }), context: { message: messages[2] } },
{ result: dlq('dlq item', new Error('test error')), context: { message: messages[3] } },
])
})
})
describe('pipeConcurrently() - concurrent individual processing', () => {
it('should process each item concurrently', async () => {
const messages: Message[] = [
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('2'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
]
const batch = createBatch(messages)
const processor = {
async process(input: any) {
await new Promise((resolve) => setTimeout(resolve, 1))
const count = parseInt(input.result.value.message.value?.toString() || '0')
return { result: ok({ count: count * 2 }), context: input.context }
},
}
const pipeline = createNewBatchPipeline().pipeConcurrently(processor)
pipeline.feed(batch)
// Collect all results by calling next() until it returns null
const allResults = []
let result = await pipeline.next()
while (result !== null) {
allResults.push(...result) // Flatten the array
result = await pipeline.next()
}
expect(allResults).toEqual([
{ result: ok({ count: 2 }), context: { message: messages[0] } },
{ result: ok({ count: 4 }), context: { message: messages[1] } },
{ result: ok({ count: 6 }), context: { message: messages[2] } },
])
})
it('should preserve order despite concurrent execution', async () => {
const messages: Message[] = [
{ value: Buffer.from('30'), topic: 'test', partition: 0, offset: 1 } as Message,
{ value: Buffer.from('10'), topic: 'test', partition: 0, offset: 2 } as Message,
{ value: Buffer.from('20'), topic: 'test', partition: 0, offset: 3 } as Message,
]
const batch = createBatch(messages)
const processor = {
async process(input: any) {
const delay = parseInt(input.result.value.message.value?.toString() || '0')
await new Promise((resolve) => setTimeout(resolve, delay))
return { result: ok({ processed: delay }), context: input.context }
},
}
const pipeline = createNewBatchPipeline().pipeConcurrently(processor)
pipeline.feed(batch)
// Collect all results by calling next() until it returns null
const allResults = []
let result = await pipeline.next()
while (result !== null) {
allResults.push(...result) // Flatten the array
result = await pipeline.next()
}
expect(allResults).toEqual([
{ result: ok({ processed: 30 }), context: { message: messages[0] } },
{ result: ok({ processed: 10 }), context: { message: messages[1] } },
{ result: ok({ processed: 20 }), context: { message: messages[2] } },
])
})
})
describe('error handling', () => {
it('should propagate errors from batch operations', async () => {
const messages: Message[] = [{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message]
const batch = createBatch(messages)
const rootPipeline = createNewBatchPipeline()
const pipeline = new SequentialBatchPipeline(() => {
return Promise.reject(new Error('Batch step failed'))
}, rootPipeline)
pipeline.feed(batch)
await expect(pipeline.next()).rejects.toThrow('Batch step failed')
})
it('should propagate errors from concurrent operations', async () => {
const messages: Message[] = [{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message]
const batch = createBatch(messages)
const processor = {
process() {
return Promise.reject(new Error('Concurrent step failed'))
},
}
const pipeline = createNewBatchPipeline().pipeConcurrently(processor)
pipeline.feed(batch)
await expect(pipeline.next()).rejects.toThrow('Concurrent step failed')
})
})
})

View File

@@ -0,0 +1,61 @@
import { instrumentFn } from '../../common/tracing/tracing-utils'
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
import { PipelineResultWithContext } from './pipeline.interface'
import { PipelineResult, PipelineResultOk, isOkResult } from './results'
/**
* Type guard for ResultWithContext that asserts the result is successful
*/
function isSuccessResultWithContext<T>(
resultWithContext: PipelineResultWithContext<T>
): resultWithContext is PipelineResultWithContext<T> & { result: PipelineResultOk<T> } {
return isOkResult(resultWithContext.result)
}
export type BatchProcessingStep<T, U> = (values: T[]) => Promise<PipelineResult<U>[]>
export class SequentialBatchPipeline<TInput, TIntermediate, TOutput> implements BatchPipeline<TInput, TOutput> {
constructor(
private currentStep: BatchProcessingStep<TIntermediate, TOutput>,
private previousPipeline: BatchPipeline<TInput, TIntermediate>
) {}
feed(elements: BatchPipelineResultWithContext<TInput>): void {
this.previousPipeline.feed(elements)
}
async next(): Promise<BatchPipelineResultWithContext<TOutput> | null> {
const previousResults = await this.previousPipeline.next()
if (previousResults === null) {
return null
}
// Filter successful values for processing
const successfulValues = previousResults
.filter(isSuccessResultWithContext)
.map((resultWithContext) => resultWithContext.result.value)
// Apply current step to successful values
const stepName = this.currentStep.name || 'anonymousBatchStep'
let stepResults: PipelineResult<TOutput>[] = []
if (successfulValues.length > 0) {
stepResults = await instrumentFn(stepName, () => this.currentStep(successfulValues))
}
let stepIndex = 0
// Map results back, preserving context and non-successful results
return previousResults.map((resultWithContext) => {
if (isOkResult(resultWithContext.result)) {
return {
result: stepResults[stepIndex++],
context: resultWithContext.context,
}
} else {
return {
result: resultWithContext.result,
context: resultWithContext.context,
}
}
})
}
}

View File

@@ -0,0 +1,87 @@
import { Message } from 'node-rdkafka'
import { dlq, drop, ok, redirect } from './results'
import { StartPipeline } from './start-pipeline'
import { StepPipeline } from './step-pipeline'
describe('StartPipeline', () => {
describe('basic functionality', () => {
it('should process single item through pipeline with success result', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const pipeline = new StartPipeline<{ data: string }>().pipe((input) => {
return ok({ processed: input.data })
})
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
})
it('should process single item through pipeline with drop result', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const pipeline = new StartPipeline<{ data: string }>().pipe((_input) => {
return drop('dropped item')
})
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(result).toEqual({ result: drop('dropped item'), context: { message } })
})
it('should process single item through pipeline with dlq result', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const pipeline = new StartPipeline<{ data: string }>().pipe((_input) => {
return dlq('dlq item', new Error('test error'))
})
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(result).toEqual({ result: dlq('dlq item', new Error('test error')), context: { message } })
})
it('should process single item through pipeline with redirect result', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const pipeline = new StartPipeline<{ data: string }>().pipe((_input) => {
return redirect('redirect item', 'retry-topic')
})
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(result).toEqual({ result: redirect('redirect item', 'retry-topic'), context: { message } })
})
})
describe('pipe() - synchronous steps', () => {
it('should return StepPipeline instance and call the step', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const step = jest.fn().mockReturnValue(ok({ processed: 'test' }))
const pipeline = new StartPipeline<{ data: string }>()
const stepPipeline = pipeline.pipe(step)
expect(stepPipeline).toBeInstanceOf(StepPipeline)
const result = await stepPipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(step).toHaveBeenCalledWith({ data: 'test' })
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
})
})
describe('pipeAsync() - async steps', () => {
it('should return StepPipeline instance and call the async step', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const asyncStep = jest.fn().mockResolvedValue(ok({ processed: 'test' }))
const pipeline = new StartPipeline<{ data: string }>()
const stepPipeline = pipeline.pipeAsync(asyncStep)
expect(stepPipeline).toBeInstanceOf(StepPipeline)
const result = await stepPipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(asyncStep).toHaveBeenCalledWith({ data: 'test' })
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
})
})
})

View File

@@ -0,0 +1,27 @@
import { instrumentFn } from '~/common/tracing/tracing-utils'
import { Pipeline, PipelineResultWithContext } from './pipeline.interface'
import { StepPipeline } from './step-pipeline'
import { AsyncProcessingStep, SyncProcessingStep } from './steps'
export class StartPipeline<T> implements Pipeline<T, T> {
async process(input: PipelineResultWithContext<T>): Promise<PipelineResultWithContext<T>> {
return Promise.resolve(input)
}
pipe<U>(step: SyncProcessingStep<T, U>): StepPipeline<T, T, U> {
const stepName = step.name || 'anonymousStep'
const wrappedStep = async (value: T) => {
return await instrumentFn(stepName, () => Promise.resolve(step(value)))
}
return new StepPipeline<T, T, U>(wrappedStep, this)
}
pipeAsync<U>(step: AsyncProcessingStep<T, U>): StepPipeline<T, T, U> {
const stepName = step.name || 'anonymousAsyncStep'
const wrappedStep = async (value: T) => {
return await instrumentFn(stepName, () => step(value))
}
return new StepPipeline<T, T, U>(wrappedStep, this)
}
}

View File

@@ -0,0 +1,140 @@
import { Message } from 'node-rdkafka'
import { drop, isOkResult, ok } from './results'
import { StartPipeline } from './start-pipeline'
import { StepPipeline } from './step-pipeline'
describe('StepPipeline', () => {
describe('constructor', () => {
it('should create instance with step and previous pipeline', () => {
const mockStep = jest.fn()
const mockPrevious = {} as any
const pipeline = new StepPipeline(mockStep, mockPrevious)
expect(pipeline).toBeInstanceOf(StepPipeline)
})
})
describe('process', () => {
it('should execute step when previous result is success', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const step = jest.fn().mockResolvedValue(ok({ processed: 'test' }))
const previous = new StartPipeline<{ data: string }>()
const pipeline = new StepPipeline(step, previous)
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
expect(step).toHaveBeenCalledWith({ data: 'test' })
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
})
it('should skip step when previous result is not success', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const step = jest.fn()
const previous = new StartPipeline<{ data: string }>()
const pipeline = new StepPipeline(step, previous)
const result = await pipeline.process({ result: drop('dropped'), context: { message } })
expect(step).not.toHaveBeenCalled()
expect(result).toEqual({ result: drop('dropped'), context: { message } })
})
it('should handle step errors', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const step = jest.fn().mockRejectedValue(new Error('Step failed'))
const previous = new StartPipeline<{ data: string }>()
const pipeline = new StepPipeline(step, previous)
await expect(pipeline.process({ result: ok({ data: 'test' }), context: { message } })).rejects.toThrow(
'Step failed'
)
})
})
describe('pipe', () => {
it('should create new StepPipeline with additional step', () => {
const step1 = jest.fn()
const step2 = jest.fn()
const previous = {} as any
const pipeline1 = new StepPipeline(step1, previous)
const pipeline2 = pipeline1.pipe(step2)
expect(pipeline2).toBeInstanceOf(StepPipeline)
})
it('should execute steps in order when processing through chained pipeline', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const step1 = jest.fn().mockImplementation((input) => {
return Promise.resolve(ok({ value: input.value + 5 })) // Add 5
})
const step2 = jest.fn().mockImplementation((input) => {
return Promise.resolve(ok({ value: input.value * 2 })) // Multiply by 2
})
const previous = new StartPipeline<{ value: number }>()
const pipeline1 = new StepPipeline(step1, previous)
const pipeline2 = pipeline1.pipe(step2)
const result = await pipeline2.process({ result: ok({ value: 10 }), context: { message } })
expect(step1).toHaveBeenCalledWith({ value: 10 })
expect(step2).toHaveBeenCalledWith({ value: 15 }) // 10 + 5
const pipelineResult = result.result
expect(isOkResult(pipelineResult)).toBe(true)
if (isOkResult(pipelineResult)) {
expect(pipelineResult.value).toEqual({ value: 30 }) // (10 + 5) * 2 = 30
}
expect(step1).toHaveBeenCalledTimes(1)
expect(step2).toHaveBeenCalledTimes(1)
})
})
describe('pipeAsync', () => {
it('should create new StepPipeline with async step', () => {
const step1 = jest.fn()
const asyncStep = jest.fn()
const previous = {} as any
const pipeline1 = new StepPipeline(step1, previous)
const pipeline2 = pipeline1.pipeAsync(asyncStep)
expect(pipeline2).toBeInstanceOf(StepPipeline)
})
it('should execute steps in order when processing through chained async pipeline', async () => {
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
const step1 = jest.fn().mockImplementation((input) => {
return Promise.resolve(ok({ value: input.value * 3 })) // Multiply by 3
})
const asyncStep = jest.fn().mockImplementation(async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return ok({ value: input.value - 2 }) // Subtract 2
})
const previous = new StartPipeline<{ value: number }>()
const pipeline1 = new StepPipeline(step1, previous)
const pipeline2 = pipeline1.pipeAsync(asyncStep)
const result = await pipeline2.process({ result: ok({ value: 4 }), context: { message } })
expect(step1).toHaveBeenCalledWith({ value: 4 })
expect(asyncStep).toHaveBeenCalledWith({ value: 12 }) // 4 * 3
const pipelineResult = result.result
expect(isOkResult(pipelineResult)).toBe(true)
if (isOkResult(pipelineResult)) {
expect(pipelineResult.value).toEqual({ value: 10 }) // (4 * 3) - 2 = 10
}
expect(step1).toHaveBeenCalledTimes(1)
expect(asyncStep).toHaveBeenCalledTimes(1)
})
})
})

View File

@@ -0,0 +1,49 @@
import { instrumentFn } from '../../common/tracing/tracing-utils'
import { Pipeline, PipelineResultWithContext } from './pipeline.interface'
import { PipelineResult, isOkResult } from './results'
import { AsyncProcessingStep, SyncProcessingStep } from './steps'
export class StepPipeline<TInput, TIntermediate, TOutput> implements Pipeline<TInput, TOutput> {
constructor(
private currentStep: (value: TIntermediate) => Promise<PipelineResult<TOutput>>,
private previousPipeline: Pipeline<TInput, TIntermediate>
) {}
pipe<U>(step: SyncProcessingStep<TOutput, U>): StepPipeline<TInput, TOutput, U> {
const stepName = step.name || 'anonymousStep'
const wrappedStep = async (value: TOutput) => {
return await instrumentFn(stepName, () => Promise.resolve(step(value)))
}
return new StepPipeline<TInput, TOutput, U>(wrappedStep, this)
}
pipeAsync<U>(step: AsyncProcessingStep<TOutput, U>): StepPipeline<TInput, TOutput, U> {
const stepName = step.name || 'anonymousAsyncStep'
const wrappedStep = async (value: TOutput) => {
return await instrumentFn(stepName, () => step(value))
}
return new StepPipeline<TInput, TOutput, U>(wrappedStep, this)
}
async process(input: PipelineResultWithContext<TInput>): Promise<PipelineResultWithContext<TOutput>> {
// Process through the previous pipeline first
const previousResultWithContext = await this.previousPipeline.process(input)
// If the previous step failed, return the failure with preserved context
const previousResult = previousResultWithContext.result
if (!isOkResult(previousResult)) {
return {
result: previousResult,
context: previousResultWithContext.context,
}
}
// Apply the current step to the successful result value from previous pipeline
const currentResult = await this.currentStep(previousResult.value)
return {
result: currentResult,
context: previousResultWithContext.context,
}
}
}

View File

@@ -0,0 +1,11 @@
import { PipelineResult } from './results'
/**
* Synchronous processing step that takes a value and returns a processing result
*/
export type SyncProcessingStep<T, U> = (value: T) => PipelineResult<U>
/**
* Asynchronous processing step that takes a value and returns a promise of processing result
*/
export type AsyncProcessingStep<T, U> = (value: T) => Promise<PipelineResult<U>>

View File

@@ -1,358 +0,0 @@
import { dlq, drop, redirect, success } from '../worker/ingestion/event-pipeline/pipeline-step-result'
import {
AsyncPreprocessingStep,
AsyncProcessingPipeline,
ProcessingPipeline,
SyncPreprocessingStep,
} from './processing-pipeline'
describe('ProcessingPipeline', () => {
describe('static methods', () => {
it('should create pipeline with success result using of()', () => {
const value = { test: 'data' }
const pipeline = ProcessingPipeline.of(value)
const result = pipeline.unwrap()
expect(result).toEqual(success(value))
})
})
describe('pipe() - synchronous steps', () => {
it('should execute step when result is success', () => {
const initialValue = { count: 1 }
const step: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
return success({ count: input.count + 1 })
}
const result = ProcessingPipeline.of(initialValue).pipe(step).unwrap()
expect(result).toEqual(success({ count: 2 }))
})
it('should skip step when result is drop', () => {
const initialValue = { count: 1 }
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return drop('dropped by first step')
}
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
return success({ count: input.count + 1 })
})
const result = ProcessingPipeline.of(initialValue).pipe(dropStep).pipe(secondStep).unwrap()
expect(result).toEqual(drop('dropped by first step'))
expect(secondStep).not.toHaveBeenCalled()
})
it('should skip step when result is redirect', () => {
const initialValue = { count: 1 }
const redirectStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return redirect('test redirect', 'overflow-topic', true, false)
}
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
return success({ count: input.count + 1 })
})
const result = ProcessingPipeline.of(initialValue).pipe(redirectStep).pipe(secondStep).unwrap()
expect(result).toEqual(redirect('test redirect', 'overflow-topic', true, false))
expect(secondStep).not.toHaveBeenCalled()
})
it('should skip step when result is dlq', () => {
const initialValue = { count: 1 }
const dlqStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return dlq('test dlq', new Error('test error'))
}
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
return success({ count: input.count + 1 })
})
const result = ProcessingPipeline.of(initialValue).pipe(dlqStep).pipe(secondStep).unwrap()
expect(result).toEqual(dlq('test dlq', new Error('test error')))
expect(secondStep).not.toHaveBeenCalled()
})
it('should chain multiple synchronous steps', () => {
const initialValue = { count: 0 }
const step1: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
return success({ count: input.count + 1 })
}
const step2: SyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = (input) => {
return success({ count: input.count, doubled: input.count * 2 })
}
const step3: SyncPreprocessingStep<{ count: number; doubled: number }, { final: string }> = (input) => {
return success({ final: `count: ${input.count}, doubled: ${input.doubled}` })
}
const result = ProcessingPipeline.of(initialValue).pipe(step1).pipe(step2).pipe(step3).unwrap()
expect(result).toEqual(success({ final: 'count: 1, doubled: 2' }))
})
it('should stop chain when step returns drop', () => {
const initialValue = { count: 0 }
const step1: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
return success({ count: input.count + 1 })
}
const step2: SyncPreprocessingStep<{ count: number }, { count: number }> = () => {
return drop('step2 dropped')
}
const step3: SyncPreprocessingStep<{ count: number }, { final: string }> = (input) => {
return success({ final: `count: ${input.count}` })
}
const result = ProcessingPipeline.of(initialValue).pipe(step1).pipe(step2).pipe(step3).unwrap()
expect(result).toEqual(drop('step2 dropped'))
})
})
describe('pipeAsync() - mixed sync/async steps', () => {
it('should transition to AsyncProcessingPipeline', async () => {
const initialValue = { count: 1 }
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const asyncPipeline = ProcessingPipeline.of(initialValue).pipeAsync(asyncStep)
expect(asyncPipeline).toBeInstanceOf(AsyncProcessingPipeline)
const result = await asyncPipeline.unwrap()
expect(result).toEqual(success({ count: 2 }))
})
it('should not execute async step when result is failure', async () => {
const initialValue = { count: 1 }
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return drop('initial drop')
}
const asyncStep: AsyncPreprocessingStep<{ count: number }, { executed: boolean }> = jest.fn(async () => {
await Promise.resolve()
return success({ executed: true })
})
const result = await ProcessingPipeline.of(initialValue).pipe(dropStep).pipeAsync(asyncStep).unwrap()
expect(result).toEqual(drop('initial drop'))
expect(asyncStep).not.toHaveBeenCalled()
})
})
})
describe('AsyncProcessingPipeline', () => {
describe('pipe() - synchronous steps on async pipeline', () => {
it('should execute sync step after async step', async () => {
const initialValue = { count: 1 }
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const syncStep: SyncPreprocessingStep<{ count: number }, { count: number; final: boolean }> = (input) => {
return success({ count: input.count, final: true })
}
const result = await ProcessingPipeline.of(initialValue).pipeAsync(asyncStep).pipe(syncStep).unwrap()
expect(result).toEqual(success({ count: 2, final: true }))
})
it('should skip sync step when async result is failure', async () => {
const initialValue = { count: 1 }
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
await new Promise((resolve) => setTimeout(resolve, 1))
return drop('async drop')
}
const syncStep: SyncPreprocessingStep<{ count: number }, { final: boolean }> = jest.fn((_input) => {
return success({ final: true })
})
const result = await ProcessingPipeline.of(initialValue).pipeAsync(asyncStep).pipe(syncStep).unwrap()
expect(result).toEqual(drop('async drop'))
expect(syncStep).not.toHaveBeenCalled()
})
})
describe('pipeAsync() - chaining async steps', () => {
it('should chain multiple async steps', async () => {
const initialValue = { count: 0 }
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const step2: AsyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = async (
input
) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count, doubled: input.count * 2 })
}
const result = await ProcessingPipeline.of(initialValue).pipeAsync(step1).pipeAsync(step2).unwrap()
expect(result).toEqual(success({ count: 1, doubled: 2 }))
})
it('should stop chain when async step returns failure', async () => {
const initialValue = { count: 0 }
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const step2: AsyncPreprocessingStep<{ count: number }, { count: number }> = async () => {
await new Promise((resolve) => setTimeout(resolve, 1))
return redirect('async redirect', 'overflow-topic', false, true)
}
const step3: AsyncPreprocessingStep<{ count: number }, { final: string }> = jest.fn(async (input) => {
await Promise.resolve()
return success({ final: `count: ${input.count}` })
})
const result = await ProcessingPipeline.of(initialValue)
.pipeAsync(step1)
.pipeAsync(step2)
.pipeAsync(step3)
.unwrap()
expect(result).toEqual(redirect('async redirect', 'overflow-topic', false, true))
expect(step3).not.toHaveBeenCalled()
})
})
describe('mixed sync and async steps', () => {
it('should handle complex pipeline with mixed step types', async () => {
const initialValue = { value: 'start' }
const syncStep1: SyncPreprocessingStep<typeof initialValue, { value: string; step1: boolean }> = (
input
) => {
return success({ value: input.value + '-sync1', step1: true })
}
const asyncStep1: AsyncPreprocessingStep<
{ value: string; step1: boolean },
{ value: string; step1: boolean; async1: boolean }
> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ ...input, value: input.value + '-async1', async1: true })
}
const syncStep2: SyncPreprocessingStep<
{ value: string; step1: boolean; async1: boolean },
{ final: string }
> = (input) => {
return success({ final: `${input.value}-sync2` })
}
const result = await ProcessingPipeline.of(initialValue)
.pipe(syncStep1)
.pipeAsync(asyncStep1)
.pipe(syncStep2)
.unwrap()
expect(result).toEqual(success({ final: 'start-sync1-async1-sync2' }))
})
})
describe('error handling', () => {
it('should handle async step that throws an error', async () => {
const initialValue = { count: 1 }
const errorStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
await Promise.resolve()
throw new Error('Step failed')
}
await expect(ProcessingPipeline.of(initialValue).pipeAsync(errorStep).unwrap()).rejects.toThrow(
'Step failed'
)
})
it('should handle sync step that throws an error', () => {
const initialValue = { count: 1 }
const errorStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
throw new Error('Sync step failed')
}
expect(() => {
ProcessingPipeline.of(initialValue).pipe(errorStep).unwrap()
}).toThrow('Sync step failed')
})
})
})
describe('Type safety and generics', () => {
it('should maintain type safety through pipeline transformations', () => {
interface Input1 {
a: number
}
interface Input2 {
b: string
}
interface Input3 {
c: boolean
}
const step1: SyncPreprocessingStep<Input1, Input2> = (input) => {
expect(typeof input.a).toBe('number')
return success({ b: input.a.toString() })
}
const step2: SyncPreprocessingStep<Input2, Input3> = (input) => {
expect(typeof input.b).toBe('string')
return success({ c: input.b === '42' })
}
const result = ProcessingPipeline.of({ a: 42 }).pipe(step1).pipe(step2).unwrap()
expect(result).toEqual(success({ c: true }))
})
it('should work with complex nested types', () => {
interface ComplexInput {
user: { id: string; name: string }
metadata: { timestamp: number; source: string }
}
interface ProcessedOutput {
userId: string
displayName: string
processedAt: string
}
const processStep: SyncPreprocessingStep<ComplexInput, ProcessedOutput> = (input) => {
return success({
userId: input.user.id,
displayName: input.user.name.toUpperCase(),
processedAt: new Date(input.metadata.timestamp).toISOString(),
})
}
const complexInput: ComplexInput = {
user: { id: 'user123', name: 'John Doe' },
metadata: { timestamp: 1640995200000, source: 'api' },
}
const result = ProcessingPipeline.of(complexInput).pipe(processStep).unwrap()
expect(result).toEqual(
success({
userId: 'user123',
displayName: 'JOHN DOE',
processedAt: '2022-01-01T00:00:00.000Z',
})
)
})
})

View File

@@ -1,76 +0,0 @@
import {
PipelineStepResult,
PipelineStepResultType,
isSuccessResult,
} from '../worker/ingestion/event-pipeline/pipeline-step-result'
export type ProcessingResult<T> = PipelineStepResult<T>
export class AsyncProcessingPipeline<T> {
constructor(private resultPromise: Promise<ProcessingResult<T>>) {}
pipe<U>(step: (value: T) => ProcessingResult<U>): AsyncProcessingPipeline<U> {
const nextResultPromise = this.resultPromise.then((currentResult) => {
if (!isSuccessResult(currentResult)) {
return currentResult
}
return step(currentResult.value)
})
return new AsyncProcessingPipeline(nextResultPromise)
}
pipeAsync<U>(step: (value: T) => Promise<ProcessingResult<U>>): AsyncProcessingPipeline<U> {
const nextResultPromise = this.resultPromise.then(async (currentResult) => {
if (!isSuccessResult(currentResult)) {
return currentResult
}
return await step(currentResult.value)
})
return new AsyncProcessingPipeline(nextResultPromise)
}
async unwrap(): Promise<ProcessingResult<T>> {
return await this.resultPromise
}
}
export class ProcessingPipeline<T> {
constructor(private result: ProcessingResult<T>) {}
pipe<U>(step: (value: T) => ProcessingResult<U>): ProcessingPipeline<U> {
if (!isSuccessResult(this.result)) {
return new ProcessingPipeline(this.result)
}
const stepResult = step(this.result.value)
return new ProcessingPipeline(stepResult)
}
pipeAsync<U>(step: (value: T) => Promise<ProcessingResult<U>>): AsyncProcessingPipeline<U> {
if (!isSuccessResult(this.result)) {
const failurePromise = Promise.resolve(this.result)
return new AsyncProcessingPipeline(failurePromise)
}
const stepResultPromise = step(this.result.value)
return new AsyncProcessingPipeline(stepResultPromise)
}
unwrap(): ProcessingResult<T> {
return this.result
}
static of<T>(value: T): ProcessingPipeline<T> {
return new ProcessingPipeline({ type: PipelineStepResultType.OK, value })
}
}
export type SyncPreprocessingStep<T, U> = (value: T) => ProcessingResult<U>
export type AsyncPreprocessingStep<T, U> = (value: T) => Promise<ProcessingResult<U>>
export type PreprocessingStep<T, U> = SyncPreprocessingStep<T, U> | AsyncPreprocessingStep<T, U>

View File

@@ -1,74 +0,0 @@
export enum PipelineStepResultType {
OK,
DLQ,
DROP,
REDIRECT,
}
/**
* Generic result type for pipeline steps that can succeed, be dropped, or sent to DLQ
*/
export type PipelineStepResultOk<T> = { type: PipelineStepResultType.OK; value: T }
export type PipelineStepResultDlq = { type: PipelineStepResultType.DLQ; reason: string; error: unknown }
export type PipelineStepResultDrop = { type: PipelineStepResultType.DROP; reason: string }
export type PipelineStepResultRedirect = {
type: PipelineStepResultType.REDIRECT
reason: string
topic: string
preserveKey?: boolean
awaitAck?: boolean
}
export type PipelineStepResult<T> =
| PipelineStepResultOk<T>
| PipelineStepResultDlq
| PipelineStepResultDrop
| PipelineStepResultRedirect
/**
* Helper functions for creating pipeline step results
*/
export function success<T>(value: T): PipelineStepResult<T> {
return { type: PipelineStepResultType.OK, value }
}
export function dlq<T>(reason: string, error?: any): PipelineStepResult<T> {
return { type: PipelineStepResultType.DLQ, reason, error }
}
export function drop<T>(reason: string): PipelineStepResult<T> {
return { type: PipelineStepResultType.DROP, reason }
}
export function redirect<T>(
reason: string,
topic: string,
preserveKey: boolean = true,
awaitAck: boolean = true
): PipelineStepResult<T> {
return {
type: PipelineStepResultType.REDIRECT,
reason,
topic,
preserveKey,
awaitAck,
}
}
/**
* Type guard functions
*/
export function isSuccessResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultOk<T> {
return result.type === PipelineStepResultType.OK
}
export function isDlqResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultDlq {
return result.type === PipelineStepResultType.DLQ
}
export function isDropResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultDrop {
return result.type === PipelineStepResultType.DROP
}
export function isRedirectResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultRedirect {
return result.type === PipelineStepResultType.REDIRECT
}

View File

@@ -4,12 +4,12 @@ import { PluginEvent } from '@posthog/plugin-scaffold'
import { Person, Team } from '~/types'
import { PipelineResult, isOkResult, ok } from '../../../ingestion/pipelines/results'
import { PersonContext } from '../persons/person-context'
import { PersonEventProcessor } from '../persons/person-event-processor'
import { PersonMergeService } from '../persons/person-merge-service'
import { PersonPropertyService } from '../persons/person-property-service'
import { PersonsStoreForBatch } from '../persons/persons-store-for-batch'
import { PipelineStepResult, isSuccessResult, success } from './pipeline-step-result'
import { EventPipelineRunner } from './runner'
export async function processPersonsStep(
@@ -19,7 +19,7 @@ export async function processPersonsStep(
timestamp: DateTime,
processPerson: boolean,
personStoreBatch: PersonsStoreForBatch
): Promise<PipelineStepResult<[PluginEvent, Person, Promise<void>]>> {
): Promise<PipelineResult<[PluginEvent, Person, Promise<void>]>> {
const context = new PersonContext(
event,
team,
@@ -39,8 +39,8 @@ export async function processPersonsStep(
)
const [result, kafkaAck] = await processor.processEvent()
if (isSuccessResult(result)) {
return success([event, result.value, kafkaAck])
if (isOkResult(result)) {
return ok([event, result.value, kafkaAck])
} else {
return result
}

View File

@@ -1,6 +1,7 @@
import { PluginEvent } from '@posthog/plugin-scaffold'
import { HogTransformerService } from '../../../cdp/hog-transformations/hog-transformer.service'
import { isDlqResult, isDropResult, isOkResult, isRedirectResult } from '../../../ingestion/pipelines/results'
import { eventDroppedCounter } from '../../../main/ingestion-queues/metrics'
import { EventHeaders, Hub, PipelineEvent, Team } from '../../../types'
import { DependencyUnavailableError } from '../../../utils/db/error'
@@ -29,7 +30,6 @@ import {
pipelineStepThrowCounter,
} from './metrics'
import { normalizeEventStep } from './normalizeEventStep'
import { isDlqResult, isDropResult, isRedirectResult, isSuccessResult } from './pipeline-step-result'
import { prepareEventStep } from './prepareEventStep'
import { processPersonsStep } from './processPersonsStep'
import { transformEventStep } from './transformEventStep'
@@ -314,7 +314,7 @@ export class EventPipelineRunner {
event.team_id
)
if (!isSuccessResult(personStepResult)) {
if (!isOkResult(personStepResult)) {
// Handle DLQ/drop/redirect cases - return early from pipeline
if (isDlqResult(personStepResult)) {
await this.sendToDLQ(event, personStepResult.error, 'processPersonsStep')

View File

@@ -4,9 +4,9 @@ import { DateTime } from 'luxon'
import { PluginEvent } from '@posthog/plugin-scaffold'
import { ONE_HOUR } from '../../../config/constants'
import { PipelineResult, dlq, ok, redirect } from '../../../ingestion/pipelines/results'
import { InternalPerson, Person } from '../../../types'
import { logger } from '../../../utils/logger'
import { PipelineStepResult, dlq, redirect, success } from '../event-pipeline/pipeline-step-result'
import { uuidFromDistinctId } from '../person-uuid'
import { PersonContext } from './person-context'
import { PersonMergeService } from './person-merge-service'
@@ -35,7 +35,7 @@ export class PersonEventProcessor {
private mergeService: PersonMergeService
) {}
async processEvent(): Promise<[PipelineStepResult<Person>, Promise<void>]> {
async processEvent(): Promise<[PipelineResult<Person>, Promise<void>]> {
if (!this.context.processPerson) {
return await this.handlePersonlessMode()
}
@@ -65,10 +65,7 @@ export class PersonEventProcessor {
try {
const [updatedPerson, updateKafkaAck] =
await this.propertyService.updatePersonProperties(personFromMerge)
return [
success(updatedPerson),
Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined),
]
return [ok(updatedPerson), Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined)]
} catch (error) {
// Shortcut didn't work, swallow the error and try normal retry loop below
logger.debug('🔁', `failed update after adding distinct IDs, retrying`, { error })
@@ -77,10 +74,10 @@ export class PersonEventProcessor {
// Handle regular property updates
const [updatedPerson, updateKafkaAck] = await this.propertyService.handleUpdate()
return [success(updatedPerson), Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined)]
return [ok(updatedPerson), Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined)]
}
private async handlePersonlessMode(): Promise<[PipelineStepResult<Person>, Promise<void>]> {
private async handlePersonlessMode(): Promise<[PipelineResult<Person>, Promise<void>]> {
let existingPerson = await this.context.personStore.fetchForChecking(
this.context.team.id,
this.context.distinctId
@@ -135,7 +132,7 @@ export class PersonEventProcessor {
person.force_upgrade = true
}
return [success(person), Promise.resolve()]
return [ok(person), Promise.resolve()]
}
// We need a value from the `person_created_column` in ClickHouse. This should be
@@ -150,14 +147,14 @@ export class PersonEventProcessor {
uuid: uuidFromDistinctId(this.context.team.id, this.context.distinctId),
created_at: createdAt,
}
return [success(fakePerson), Promise.resolve()]
return [ok(fakePerson), Promise.resolve()]
}
getContext(): PersonContext {
return this.context
}
private handleMergeError(error: unknown, event: PluginEvent): PipelineStepResult<Person> | null {
private handleMergeError(error: unknown, event: PluginEvent): PipelineResult<Person> | null {
const mergeMode = this.context.mergeMode
if (error instanceof PersonMergeLimitExceededError) {

View File

@@ -1,613 +0,0 @@
import { Message } from 'node-rdkafka'
import { AsyncPreprocessingStep, SyncPreprocessingStep } from '../../ingestion/processing-pipeline'
import { KafkaProducerWrapper } from '../../kafka/producer'
import { PromiseScheduler } from '../../utils/promise-scheduler'
import { dlq, drop, redirect, success } from './event-pipeline/pipeline-step-result'
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from './pipeline-helpers'
import { AsyncResultHandlingPipeline, PipelineConfig, ResultHandlingPipeline } from './result-handling-pipeline'
// Mock the pipeline helpers
jest.mock('./pipeline-helpers', () => ({
logDroppedMessage: jest.fn(),
redirectMessageToTopic: jest.fn(),
sendMessageToDLQ: jest.fn(),
}))
const mockLogDroppedMessage = logDroppedMessage as jest.MockedFunction<typeof logDroppedMessage>
const mockRedirectMessageToTopic = redirectMessageToTopic as jest.MockedFunction<typeof redirectMessageToTopic>
const mockSendMessageToDLQ = sendMessageToDLQ as jest.MockedFunction<typeof sendMessageToDLQ>
describe('ResultHandlingPipeline', () => {
let mockKafkaProducer: KafkaProducerWrapper
let mockPromiseScheduler: PromiseScheduler
let mockMessage: Message
let config: PipelineConfig
beforeEach(() => {
jest.clearAllMocks()
mockKafkaProducer = {
producer: {} as any,
queueMessages: jest.fn(),
} as unknown as KafkaProducerWrapper
mockPromiseScheduler = {
schedule: jest.fn(),
} as unknown as PromiseScheduler
mockMessage = {
value: Buffer.from('test message'),
topic: 'test-topic',
partition: 0,
offset: 123,
key: 'test-key',
headers: [],
size: 12,
} as Message
config = {
kafkaProducer: mockKafkaProducer,
dlqTopic: 'test-dlq',
promiseScheduler: mockPromiseScheduler,
}
})
describe('static methods', () => {
it('should create pipeline with success result using of()', async () => {
const value = { test: 'data' }
const pipeline = ResultHandlingPipeline.of(value, mockMessage, config)
const result = await pipeline.unwrap()
expect(result).toEqual(value)
})
})
describe('pipe() - synchronous steps', () => {
it('should execute step when result is success', async () => {
const initialValue = { count: 1 }
const step: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
return success({ count: input.count + 1 })
}
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config).pipe(step).unwrap()
expect(result).toEqual({ count: 2 })
})
it('should handle drop result and return null', async () => {
const initialValue = { count: 1 }
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return drop('test drop reason')
}
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
return success({ count: input.count + 1 })
})
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(dropStep)
.pipe(secondStep)
.unwrap()
expect(result).toBeNull()
expect(secondStep).not.toHaveBeenCalled()
expect(mockLogDroppedMessage).toHaveBeenCalledWith(
mockMessage,
'test drop reason',
'pipeline_result_handler'
)
})
it('should handle redirect result and return null', async () => {
const initialValue = { count: 1 }
const redirectStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return redirect('test redirect', 'overflow-topic', true, false)
}
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
return success({ count: input.count + 1 })
})
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(redirectStep)
.pipe(secondStep)
.unwrap()
expect(result).toBeNull()
expect(secondStep).not.toHaveBeenCalled()
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
mockKafkaProducer,
mockPromiseScheduler,
mockMessage,
'overflow-topic',
'pipeline_result_handler',
true,
false
)
})
it('should handle dlq result and return null', async () => {
const initialValue = { count: 1 }
const testError = new Error('test error')
const dlqStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return dlq('test dlq reason', testError)
}
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
return success({ count: input.count + 1 })
})
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(dlqStep)
.pipe(secondStep)
.unwrap()
expect(result).toBeNull()
expect(secondStep).not.toHaveBeenCalled()
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
mockMessage,
testError,
'pipeline_result_handler',
'test-dlq'
)
})
it('should handle dlq result without error and create default error', async () => {
const initialValue = { count: 1 }
const dlqStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return dlq('test dlq reason')
}
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config).pipe(dlqStep).unwrap()
expect(result).toBeNull()
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
mockMessage,
expect.any(Error),
'pipeline_result_handler',
'test-dlq'
)
const errorArg = (mockSendMessageToDLQ as jest.Mock).mock.calls[0][2]
expect(errorArg.message).toBe('test dlq reason')
})
it('should chain multiple synchronous steps successfully', async () => {
const initialValue = { count: 0 }
const step1: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
return success({ count: input.count + 1 })
}
const step2: SyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = (input) => {
return success({ count: input.count, doubled: input.count * 2 })
}
const step3: SyncPreprocessingStep<{ count: number; doubled: number }, { final: string }> = (input) => {
return success({ final: `count: ${input.count}, doubled: ${input.doubled}` })
}
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(step1)
.pipe(step2)
.pipe(step3)
.unwrap()
expect(result).toEqual({ final: 'count: 1, doubled: 2' })
})
})
describe('pipeAsync() - mixed sync/async steps', () => {
it('should transition to AsyncResultHandlingPipeline', async () => {
const initialValue = { count: 1 }
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const asyncPipeline = ResultHandlingPipeline.of(initialValue, mockMessage, config).pipeAsync(asyncStep)
expect(asyncPipeline).toBeInstanceOf(AsyncResultHandlingPipeline)
const result = await asyncPipeline.unwrap()
expect(result).toEqual({ count: 2 })
})
it('should not execute async step when sync result is failure', async () => {
const initialValue = { count: 1 }
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return drop('initial drop')
}
const asyncStep: AsyncPreprocessingStep<{ count: number }, { executed: boolean }> = jest.fn(async () => {
await Promise.resolve()
return success({ executed: true })
})
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(dropStep)
.pipeAsync(asyncStep)
.unwrap()
expect(result).toBeNull()
expect(asyncStep).not.toHaveBeenCalled()
expect(mockLogDroppedMessage).toHaveBeenCalledWith(
mockMessage,
'initial drop',
'async_pipeline_result_handler'
)
})
})
describe('redirect result with default parameters', () => {
it('should use default preserveKey and awaitAck when not specified', async () => {
const initialValue = { count: 1 }
const redirectStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
return redirect('test redirect', 'overflow-topic')
}
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(redirectStep)
.unwrap()
expect(result).toBeNull()
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
mockKafkaProducer,
mockPromiseScheduler,
mockMessage,
'overflow-topic',
'pipeline_result_handler',
true, // default preserveKey
true // default awaitAck
)
})
})
})
describe('AsyncResultHandlingPipeline', () => {
let mockKafkaProducer: KafkaProducerWrapper
let mockPromiseScheduler: PromiseScheduler
let mockMessage: Message
let config: PipelineConfig
beforeEach(() => {
jest.clearAllMocks()
mockKafkaProducer = {
producer: {} as any,
queueMessages: jest.fn(),
} as unknown as KafkaProducerWrapper
mockPromiseScheduler = {
schedule: jest.fn(),
} as unknown as PromiseScheduler
mockMessage = {
value: Buffer.from('test message'),
topic: 'test-topic',
partition: 0,
offset: 123,
key: 'test-key',
headers: [],
size: 12,
} as Message
config = {
kafkaProducer: mockKafkaProducer,
dlqTopic: 'test-dlq',
promiseScheduler: mockPromiseScheduler,
}
})
describe('static methods', () => {
it('should create async pipeline using of()', async () => {
const value = { test: 'data' }
const pipeline = AsyncResultHandlingPipeline.of(value, mockMessage, config)
const result = await pipeline.unwrap()
expect(result).toEqual(value)
})
})
describe('pipe() - synchronous steps on async pipeline', () => {
it('should execute sync step after async step', async () => {
const initialValue = { count: 1 }
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const syncStep: SyncPreprocessingStep<{ count: number }, { count: number; final: boolean }> = (input) => {
return success({ count: input.count, final: true })
}
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipeAsync(asyncStep)
.pipe(syncStep)
.unwrap()
expect(result).toEqual({ count: 2, final: true })
})
it('should skip sync step when async result is failure', async () => {
const initialValue = { count: 1 }
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
await new Promise((resolve) => setTimeout(resolve, 1))
return drop('async drop')
}
const syncStep: SyncPreprocessingStep<{ count: number }, { final: boolean }> = jest.fn((_input) => {
return success({ final: true })
})
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipeAsync(asyncStep)
.pipe(syncStep)
.unwrap()
expect(result).toBeNull()
expect(syncStep).not.toHaveBeenCalled()
expect(mockLogDroppedMessage).toHaveBeenCalledWith(
mockMessage,
'async drop',
'async_pipeline_result_handler'
)
})
})
describe('pipeAsync() - chaining async steps', () => {
it('should chain multiple async steps', async () => {
const initialValue = { count: 0 }
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const step2: AsyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = async (
input
) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count, doubled: input.count * 2 })
}
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipeAsync(step1)
.pipeAsync(step2)
.unwrap()
expect(result).toEqual({ count: 1, doubled: 2 })
})
it('should stop chain when async step returns failure', async () => {
const initialValue = { count: 0 }
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ count: input.count + 1 })
}
const step2: AsyncPreprocessingStep<{ count: number }, { count: number }> = async () => {
await new Promise((resolve) => setTimeout(resolve, 1))
return redirect('async redirect', 'overflow-topic', false, true)
}
const step3: AsyncPreprocessingStep<{ count: number }, { final: string }> = jest.fn(async (input) => {
await Promise.resolve()
return success({ final: `count: ${input.count}` })
})
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipeAsync(step1)
.pipeAsync(step2)
.pipeAsync(step3)
.unwrap()
expect(result).toBeNull()
expect(step3).not.toHaveBeenCalled()
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
mockKafkaProducer,
mockPromiseScheduler,
mockMessage,
'overflow-topic',
'async_pipeline_result_handler',
false,
true
)
})
it('should handle async dlq result', async () => {
const initialValue = { count: 1 }
const testError = new Error('async error')
const dlqStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
await Promise.resolve()
return dlq('async dlq reason', testError)
}
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipeAsync(dlqStep)
.unwrap()
expect(result).toBeNull()
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
mockMessage,
testError,
'async_pipeline_result_handler',
'test-dlq'
)
})
})
describe('mixed sync and async steps', () => {
it('should handle complex pipeline with mixed step types', async () => {
const initialValue = { value: 'start' }
const syncStep1: SyncPreprocessingStep<typeof initialValue, { value: string; step1: boolean }> = (
input
) => {
return success({ value: input.value + '-sync1', step1: true })
}
const asyncStep1: AsyncPreprocessingStep<
{ value: string; step1: boolean },
{ value: string; step1: boolean; async1: boolean }
> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
return success({ ...input, value: input.value + '-async1', async1: true })
}
const syncStep2: SyncPreprocessingStep<
{ value: string; step1: boolean; async1: boolean },
{ final: string }
> = (input) => {
return success({ final: `${input.value}-sync2` })
}
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
.pipe(syncStep1)
.pipeAsync(asyncStep1)
.pipe(syncStep2)
.unwrap()
expect(result).toEqual({ final: 'start-sync1-async1-sync2' })
})
})
describe('error handling', () => {
it('should propagate async step errors', async () => {
const initialValue = { count: 1 }
const errorStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
await Promise.resolve()
throw new Error('Async step failed')
}
await expect(
AsyncResultHandlingPipeline.of(initialValue, mockMessage, config).pipeAsync(errorStep).unwrap()
).rejects.toThrow('Async step failed')
})
})
})
describe('Integration tests', () => {
let mockKafkaProducer: KafkaProducerWrapper
let mockPromiseScheduler: PromiseScheduler
let mockMessage: Message
let config: PipelineConfig
beforeEach(() => {
jest.clearAllMocks()
mockKafkaProducer = {
producer: {} as any,
queueMessages: jest.fn(),
} as unknown as KafkaProducerWrapper
mockPromiseScheduler = {
schedule: jest.fn(),
} as unknown as PromiseScheduler
mockMessage = {
value: Buffer.from('test message'),
topic: 'test-topic',
partition: 0,
offset: 123,
key: 'test-key',
headers: [],
size: 12,
} as Message
config = {
kafkaProducer: mockKafkaProducer,
dlqTopic: 'test-dlq',
promiseScheduler: mockPromiseScheduler,
}
})
it('should handle realistic event processing pipeline', async () => {
interface EventInput {
rawEvent: string
}
interface ParsedEvent {
eventType: string
userId: string
}
interface ValidatedEvent extends ParsedEvent {
isValid: boolean
}
interface ProcessedEvent extends ValidatedEvent {
timestamp: string
}
const parseStep: SyncPreprocessingStep<EventInput, ParsedEvent> = (input) => {
if (input.rawEvent === 'invalid') {
return drop('Invalid event format')
}
return success({
eventType: 'pageview',
userId: 'user123',
})
}
const validateStep: AsyncPreprocessingStep<ParsedEvent, ValidatedEvent> = async (input) => {
await new Promise((resolve) => setTimeout(resolve, 1))
if (input.userId === 'blocked') {
return redirect('User blocked', 'blocked-events-topic')
}
return success({
...input,
isValid: true,
})
}
const processStep: SyncPreprocessingStep<ValidatedEvent, ProcessedEvent> = (input) => {
return success({
...input,
timestamp: '2023-01-01T00:00:00Z',
})
}
const result = await ResultHandlingPipeline.of({ rawEvent: 'test-event' }, mockMessage, config)
.pipe(parseStep)
.pipeAsync(validateStep)
.pipe(processStep)
.unwrap()
expect(result).toEqual({
eventType: 'pageview',
userId: 'user123',
isValid: true,
timestamp: '2023-01-01T00:00:00Z',
})
})
it('should handle pipeline failure at different stages', async () => {
const parseStep: SyncPreprocessingStep<{ rawEvent: string }, { parsed: boolean }> = () => {
return success({ parsed: true })
}
const validateStep: AsyncPreprocessingStep<{ parsed: boolean }, { validated: boolean }> = async () => {
await Promise.resolve()
return dlq('Validation failed', new Error('Invalid data'))
}
const processStep: SyncPreprocessingStep<{ validated: boolean }, { processed: boolean }> = jest.fn(() => {
return success({ processed: true })
})
const result = await ResultHandlingPipeline.of({ rawEvent: 'test' }, mockMessage, config)
.pipe(parseStep)
.pipeAsync(validateStep)
.pipe(processStep)
.unwrap()
expect(result).toBeNull()
expect(processStep).not.toHaveBeenCalled()
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
mockKafkaProducer,
mockMessage,
expect.any(Error),
'async_pipeline_result_handler',
'test-dlq'
)
})
})

View File

@@ -1,168 +0,0 @@
import { Message } from 'node-rdkafka'
import {
AsyncPreprocessingStep,
AsyncProcessingPipeline,
ProcessingPipeline,
ProcessingResult,
SyncPreprocessingStep,
} from '../../ingestion/processing-pipeline'
import { KafkaProducerWrapper } from '../../kafka/producer'
import { PromiseScheduler } from '../../utils/promise-scheduler'
import {
PipelineStepResultType,
isDlqResult,
isDropResult,
isRedirectResult,
isSuccessResult,
} from './event-pipeline/pipeline-step-result'
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from './pipeline-helpers'
export type PipelineConfig = {
kafkaProducer: KafkaProducerWrapper
dlqTopic: string
promiseScheduler: PromiseScheduler
}
/**
* Base class for handling pipeline results (DLQ, DROP, REDIRECT).
* Contains common logic for processing non-success results.
*/
abstract class BaseResultHandlingPipeline<T> {
protected constructor(
protected originalMessage: Message,
protected config: PipelineConfig
) {}
/**
* Handles a pipeline result, processing non-success results appropriately.
* Returns the value for success results, null for non-success results.
*/
protected async handleResult(result: ProcessingResult<T>, stepName: string): Promise<T | null> {
if (isSuccessResult(result)) {
return result.value
}
// Handle non-success results
await this.handleNonSuccessResult(result, stepName)
return null
}
private async handleNonSuccessResult(result: ProcessingResult<T>, stepName: string): Promise<void> {
if (isDlqResult(result)) {
await this.handleDlqResult(result, stepName)
} else if (isDropResult(result)) {
this.handleDropResult(result, stepName)
} else if (isRedirectResult(result)) {
await this.handleRedirectResult(result, stepName)
}
}
private async handleDlqResult(result: { reason: string; error?: unknown }, stepName: string): Promise<void> {
await sendMessageToDLQ(
this.config.kafkaProducer,
this.originalMessage,
result.error || new Error(result.reason),
stepName,
this.config.dlqTopic
)
}
private handleDropResult(result: { reason: string }, stepName: string): void {
logDroppedMessage(this.originalMessage, result.reason, stepName)
}
private async handleRedirectResult(
result: {
reason: string
topic: string
preserveKey?: boolean
awaitAck?: boolean
},
stepName: string
): Promise<void> {
await redirectMessageToTopic(
this.config.kafkaProducer,
this.config.promiseScheduler,
this.originalMessage,
result.topic,
stepName,
result.preserveKey ?? true,
result.awaitAck ?? true
)
}
}
/**
* Wrapper around ProcessingPipeline that automatically handles result types (DLQ, DROP, REDIRECT)
* and cuts execution short when encountering non-success results.
*
* Requires a KafkaProducerWrapper for DLQ and redirect functionality.
*/
export class ResultHandlingPipeline<T> extends BaseResultHandlingPipeline<T> {
private constructor(
private pipeline: ProcessingPipeline<T>,
originalMessage: Message,
config: PipelineConfig
) {
super(originalMessage, config)
}
pipe<U>(step: SyncPreprocessingStep<T, U>, _stepName?: string): ResultHandlingPipeline<U> {
const newPipeline = this.pipeline.pipe(step)
return new ResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
}
pipeAsync<U>(step: AsyncPreprocessingStep<T, U>, _stepName?: string): AsyncResultHandlingPipeline<U> {
const newPipeline = this.pipeline.pipeAsync(step)
return new AsyncResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
}
async unwrap(): Promise<T | null> {
const result = this.pipeline.unwrap()
return this.handleResult(result, 'pipeline_result_handler')
}
static of<T>(value: T, originalMessage: Message, config: PipelineConfig): ResultHandlingPipeline<T> {
const pipeline = ProcessingPipeline.of(value)
return new ResultHandlingPipeline(pipeline, originalMessage, config)
}
}
/**
* Wrapper around AsyncProcessingPipeline that automatically handles result types (DLQ, DROP, REDIRECT)
* and cuts execution short when encountering non-success results.
*
* Requires a KafkaProducerWrapper for DLQ and redirect functionality.
*/
export class AsyncResultHandlingPipeline<T> extends BaseResultHandlingPipeline<T> {
constructor(
private pipeline: AsyncProcessingPipeline<T>,
originalMessage: Message,
config: PipelineConfig
) {
super(originalMessage, config)
}
pipe<U>(step: SyncPreprocessingStep<T, U>, _stepName?: string): AsyncResultHandlingPipeline<U> {
const newPipeline = this.pipeline.pipe(step)
return new AsyncResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
}
pipeAsync<U>(step: AsyncPreprocessingStep<T, U>, _stepName?: string): AsyncResultHandlingPipeline<U> {
const newPipeline = this.pipeline.pipeAsync(step)
return new AsyncResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
}
async unwrap(): Promise<T | null> {
const result = await this.pipeline.unwrap()
return this.handleResult(result, 'async_pipeline_result_handler')
}
static of<T>(value: T, originalMessage: Message, config: PipelineConfig): AsyncResultHandlingPipeline<T> {
const pipeline = ProcessingPipeline.of(value).pipeAsync((v) =>
Promise.resolve({ type: PipelineStepResultType.OK, value: v })
)
return new AsyncResultHandlingPipeline(pipeline, originalMessage, config)
}
}

View File

@@ -1,7 +1,7 @@
import { createApplyDropRestrictionsStep } from '../../../src/ingestion/event-preprocessing/apply-drop-events-restrictions'
import { drop, ok } from '../../../src/ingestion/pipelines/results'
import { EventHeaders } from '../../../src/types'
import { EventIngestionRestrictionManager } from '../../../src/utils/event-ingestion-restriction-manager'
import { drop, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
describe('createApplyDropRestrictionsStep', () => {
let eventIngestionRestrictionManager: EventIngestionRestrictionManager
@@ -28,7 +28,7 @@ describe('createApplyDropRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldDropEvent).toHaveBeenCalledWith('valid-token-123', 'user-456')
})
@@ -60,7 +60,7 @@ describe('createApplyDropRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldDropEvent).toHaveBeenCalledWith(undefined, undefined)
})
@@ -73,7 +73,7 @@ describe('createApplyDropRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldDropEvent).toHaveBeenCalledWith(undefined, undefined)
})
})

View File

@@ -2,9 +2,9 @@ import {
OverflowConfig,
createApplyForceOverflowRestrictionsStep,
} from '../../../src/ingestion/event-preprocessing/apply-force-overflow-restrictions'
import { ok, redirect } from '../../../src/ingestion/pipelines/results'
import { EventHeaders } from '../../../src/types'
import { EventIngestionRestrictionManager } from '../../../src/utils/event-ingestion-restriction-manager'
import { redirect, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
describe('createApplyForceOverflowRestrictionsStep', () => {
let eventIngestionRestrictionManager: EventIngestionRestrictionManager
@@ -39,7 +39,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldForceOverflow).toHaveBeenCalledWith('valid-token-123', 'user-456')
// shouldSkipPerson should not be called if not forcing overflow
expect(eventIngestionRestrictionManager.shouldSkipPerson).not.toHaveBeenCalled()
@@ -101,7 +101,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldForceOverflow).toHaveBeenCalledWith(undefined, undefined)
})
@@ -114,7 +114,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldForceOverflow).toHaveBeenCalledWith(undefined, undefined)
})
@@ -138,7 +138,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
const result = disabledStep(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(eventIngestionRestrictionManager.shouldForceOverflow).not.toHaveBeenCalled()
expect(eventIngestionRestrictionManager.shouldSkipPerson).not.toHaveBeenCalled()
})

View File

@@ -1,7 +1,7 @@
import { createApplyPersonProcessingRestrictionsStep } from '../../../src/ingestion/event-preprocessing/apply-person-processing-restrictions'
import { ok } from '../../../src/ingestion/pipelines/results'
import { IncomingEventWithTeam } from '../../../src/types'
import { EventIngestionRestrictionManager } from '../../../src/utils/event-ingestion-restriction-manager'
import { success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
describe('createApplyPersonProcessingRestrictionsStep', () => {
let eventIngestionRestrictionManager: EventIngestionRestrictionManager
@@ -50,7 +50,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(input.eventWithTeam.event.properties).toEqual({ defaultProp: 'defaultValue' })
expect(input.eventWithTeam.event.token).toBe('valid-token-abc')
expect(input.eventWithTeam.event.distinct_id).toBe('user-123')
@@ -67,7 +67,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(input.eventWithTeam.event.properties?.$process_person_profile).toBe(false)
expect(eventIngestionRestrictionManager.shouldSkipPerson).toHaveBeenCalledWith(
'restricted-token-def',
@@ -85,7 +85,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(input.eventWithTeam.event.properties?.$process_person_profile).toBe(false)
expect(eventIngestionRestrictionManager.shouldSkipPerson).toHaveBeenCalledWith(
'opt-out-token-ghi',
@@ -106,7 +106,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(input.eventWithTeam.event.properties).toMatchObject({
customProp: 'customValue',
$set: { a: 1, b: 2 },
@@ -133,7 +133,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(input.eventWithTeam.event.properties).toEqual({ customProp: 'customValue' })
expect(eventIngestionRestrictionManager.shouldSkipPerson).toHaveBeenCalledWith(
undefined,
@@ -154,7 +154,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
const result = step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(input.eventWithTeam.event.properties).toMatchObject({
customProp: 'customValue',
$process_person_profile: false,

View File

@@ -1,9 +1,9 @@
import { Message } from 'node-rdkafka'
import { createParseHeadersStep } from '../../../src/ingestion/event-preprocessing/parse-headers'
import { ok } from '../../../src/ingestion/pipelines/results'
import { parseEventHeaders } from '../../../src/kafka/consumer'
import { EventHeaders } from '../../../src/types'
import { success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
// Mock dependencies
jest.mock('../../../src/kafka/consumer', () => ({
@@ -35,7 +35,7 @@ describe('createParseHeadersStep', () => {
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -56,7 +56,7 @@ describe('createParseHeadersStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
...input,
headers: expectedHeaders,
})
@@ -76,7 +76,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -92,7 +92,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -116,7 +116,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -140,7 +140,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -164,7 +164,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -188,7 +188,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
@@ -214,7 +214,7 @@ describe('createParseHeadersStep', () => {
}
const result = step(input)
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
})
})

View File

@@ -1,8 +1,8 @@
import { Message } from 'node-rdkafka'
import { createParseKafkaMessageStep } from '../../../src/ingestion/event-preprocessing/parse-kafka-message'
import { drop, ok } from '../../../src/ingestion/pipelines/results'
import { logger } from '../../../src/utils/logger'
import { drop, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
// Mock dependencies
jest.mock('../../../src/utils/logger')
@@ -42,7 +42,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {
@@ -83,7 +83,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {
@@ -115,7 +115,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {
@@ -150,7 +150,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {
@@ -187,7 +187,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {
@@ -325,7 +325,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {
@@ -357,7 +357,7 @@ describe('createParseKafkaMessageStep', () => {
const result = step(input)
expect(result).toEqual(
success({
ok({
message: mockMessage,
event: {
event: {

View File

@@ -4,8 +4,8 @@ import { DB } from '~/utils/db/db'
import { TeamManager } from '~/utils/team-manager'
import { createResolveTeamStep } from '../../../src/ingestion/event-preprocessing/resolve-team'
import { drop, ok } from '../../../src/ingestion/pipelines/results'
import { EventHeaders, Hub, IncomingEvent, Team } from '../../../src/types'
import { drop, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
import { getMetricValues, resetMetrics } from '../../helpers/metrics'
const pipelineEvent = {
@@ -116,7 +116,7 @@ describe('createResolveTeamStep()', () => {
}
const response = await step(input)
expect(response).toEqual(
success({
ok({
...input,
eventWithTeam: {
event: { ...pipelineEvent, token: teamTwoToken },
@@ -162,7 +162,7 @@ describe('createResolveTeamStep()', () => {
}
const response = await step(input)
expect(response).toEqual(
success({
ok({
...input,
eventWithTeam: {
event: { ...pipelineEvent, team_id: 3, token: teamTwoToken },

View File

@@ -1,10 +1,6 @@
import { createValidateEventUuidStep } from '../../../src/ingestion/event-preprocessing/validate-event-uuid'
import { PipelineResultType, drop, ok } from '../../../src/ingestion/pipelines/results'
import { Hub, IncomingEventWithTeam } from '../../../src/types'
import {
PipelineStepResultType,
drop,
success,
} from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
import { captureIngestionWarning } from '../../../src/worker/ingestion/utils'
import { getMetricValues, resetMetrics } from '../../helpers/metrics'
@@ -53,7 +49,7 @@ describe('createValidateEventUuidStep', () => {
const input = { eventWithTeam: mockEventWithTeam }
const result = await step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
expect(mockCaptureIngestionWarning).not.toHaveBeenCalled()
})
@@ -172,9 +168,9 @@ describe('createValidateEventUuidStep', () => {
const input = { eventWithTeam: mockEventWithTeam }
const result = await step(input)
expect(result).toEqual(success(input))
expect(result).toEqual(ok(input))
if (result.type === PipelineStepResultType.OK) {
if (result.type === PipelineResultType.OK) {
expect(result.value.eventWithTeam.event.token).toBe('test-token-123')
expect(result.value.eventWithTeam.event.distinct_id).toBe('test-user-456')
expect(result.value.eventWithTeam.event.event).toBe('test-event')

View File

@@ -2,7 +2,7 @@ import { DateTime } from 'luxon'
import { PluginEvent } from '@posthog/plugin-scaffold'
import { PipelineStepResultType, isSuccessResult } from '~/worker/ingestion/event-pipeline/pipeline-step-result'
import { PipelineResultType, isOkResult } from '~/ingestion/pipelines/results'
import { BatchWritingPersonsStoreForBatch } from '~/worker/ingestion/persons/batch-writing-person-store'
import { Hub, Team } from '../../../../src/types'
@@ -72,8 +72,8 @@ describe('processPersonsStep()', () => {
)
)
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
const [resEvent, resPerson, kafkaAck] = result.value
expect(resEvent).toEqual(pluginEvent)
expect(resPerson).toEqual(
@@ -121,8 +121,8 @@ describe('processPersonsStep()', () => {
)
)
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
const [resEvent, resPerson, kafkaAck] = result.value
expect(resEvent).toEqual({
...event,

View File

@@ -3,8 +3,8 @@ import { v4 } from 'uuid'
import { PluginEvent } from '@posthog/plugin-scaffold'
import { dlq, ok, redirect } from '~/ingestion/pipelines/results'
import { forSnapshot } from '~/tests/helpers/snapshots'
import { dlq, redirect, success } from '~/worker/ingestion/event-pipeline/pipeline-step-result'
import { BatchWritingGroupStoreForBatch } from '~/worker/ingestion/groups/batch-writing-group-store'
import { BatchWritingPersonsStoreForBatch } from '~/worker/ingestion/persons/batch-writing-person-store'
@@ -188,7 +188,7 @@ describe('EventPipelineRunner', () => {
)
jest.mocked(processPersonsStep).mockResolvedValue(
success([
ok([
pluginEvent,
{ person, personUpdateProperties: {}, get: () => Promise.resolve(person) } as any,
Promise.resolve(),

View File

@@ -5,13 +5,8 @@ import { DateTime } from 'luxon'
import { PluginEvent, Properties } from '@posthog/plugin-scaffold'
import { KAFKA_INGESTION_WARNINGS, KAFKA_PERSON, KAFKA_PERSON_DISTINCT_ID } from '~/config/kafka-topics'
import { PipelineResultType, isDlqResult, isOkResult, isRedirectResult } from '~/ingestion/pipelines/results'
import { Clickhouse } from '~/tests/helpers/clickhouse'
import {
PipelineStepResultType,
isDlqResult,
isRedirectResult,
isSuccessResult,
} from '~/worker/ingestion/event-pipeline/pipeline-step-result'
import { fromInternalPerson } from '~/worker/ingestion/persons/person-update-batch'
import { TopicMessage } from '../../../src/kafka/producer'
@@ -363,8 +358,8 @@ describe('PersonState.processEvent()', () => {
await hub.db.kafkaProducer.flush()
await kafkaAcks
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
const fakePerson = result.value
expect(fakePerson).toEqual(
expect.objectContaining({
@@ -384,7 +379,7 @@ describe('PersonState.processEvent()', () => {
// verify there are no Postgres distinct_ids
const distinctIds = await fetchDistinctIdValues(
hub.db.postgres,
isSuccessResult(result) ? (result.value as InternalPerson) : ({} as InternalPerson)
isOkResult(result) ? (result.value as InternalPerson) : ({} as InternalPerson)
)
expect(distinctIds).toEqual(expect.arrayContaining([]))
})
@@ -500,8 +495,8 @@ describe('PersonState.processEvent()', () => {
await hub.db.kafkaProducer.flush()
await kafkaAcks2
expect(result2.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result2)) {
expect(result2.type).toBe(PipelineResultType.OK)
if (isOkResult(result2)) {
const fakePerson = result2.value
expect(fakePerson).toEqual(
expect.objectContaining({
@@ -560,8 +555,8 @@ describe('PersonState.processEvent()', () => {
await hub.db.kafkaProducer.flush()
await kafkaAcks2
expect(result2.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result2)) {
expect(result2.type).toBe(PipelineResultType.OK)
if (isOkResult(result2)) {
const fakePerson = result2.value
expect(fakePerson.force_upgrade).toBeUndefined()
}
@@ -614,8 +609,8 @@ describe('PersonState.processEvent()', () => {
await hub.db.kafkaProducer.flush()
await kafkaAcks
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
const person = result.value
expect(person).toEqual(
expect.objectContaining({
@@ -646,8 +641,8 @@ describe('PersonState.processEvent()', () => {
uuid: new UUIDT().toString(),
properties: {},
}).processEvent()
expect(personVerifyResult.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(personVerifyResult)) {
expect(personVerifyResult.type).toBe(PipelineResultType.OK)
if (isOkResult(personVerifyResult)) {
expect(personVerifyResult.value.properties).toEqual({ $creator_event_uuid: originalEventUuid, c: 420 })
}
@@ -664,8 +659,8 @@ describe('PersonState.processEvent()', () => {
hub,
false
).processEvent()
expect(processPersonFalseResult.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(processPersonFalseResult)) {
expect(processPersonFalseResult.type).toBe(PipelineResultType.OK)
if (isOkResult(processPersonFalseResult)) {
expect(processPersonFalseResult.value.properties).toEqual({})
}
})
@@ -1095,8 +1090,8 @@ describe('PersonState.processEvent()', () => {
const context = personS.getContext()
await flushPersonStoreToKafka(hub, context.personStore, kafkaAcks)
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
const person = result.value
expect(person).toEqual(
expect.objectContaining({
@@ -1251,8 +1246,8 @@ describe('PersonState.processEvent()', () => {
await flushPersonStoreToKafka(hub, context.personStore, kafkaAcks)
// Return logic is still unaware that merge happened
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
const person = result.value
expect(person).toMatchObject({
id: expect.any(String),
@@ -4041,7 +4036,7 @@ describe('PersonState.processEvent()', () => {
const [result] = await processor.processEvent()
expect(result.type).toBe(PipelineStepResultType.DLQ)
expect(result.type).toBe(PipelineResultType.DLQ)
if (isDlqResult(result)) {
expect(result.reason).toBe('Merge limit exceeded')
expect((result.error as any).message).toContain('person_merge_move_limit_hit')
@@ -4070,7 +4065,7 @@ describe('PersonState.processEvent()', () => {
const [result] = await processor.processEvent()
expect(result.type).toBe(PipelineStepResultType.REDIRECT)
expect(result.type).toBe(PipelineResultType.REDIRECT)
if (isRedirectResult(result)) {
expect(result.reason).toBe('Event redirected to async merge topic')
expect(result.topic).toBe('async-merge-topic')
@@ -4115,8 +4110,8 @@ describe('PersonState.processEvent()', () => {
const [result] = await processor.processEvent()
expect(result.type).toBe(PipelineStepResultType.OK)
if (isSuccessResult(result)) {
expect(result.type).toBe(PipelineResultType.OK)
if (isOkResult(result)) {
expect(result.value).toEqual(mockPerson)
}
}