mirror of
https://github.com/BillyOutlast/posthog.git
synced 2026-02-04 03:01:23 +01:00
refactor: process cookieless through batch pipelines (#38463)
This commit is contained in:
@@ -25,7 +25,7 @@ exports[`IngestionConsumer general overflow force overflow should force events w
|
||||
{
|
||||
"headers": {
|
||||
"distinct_id": "team1-user",
|
||||
"redirect-step": "async_pipeline_result_handler",
|
||||
"redirect-step": "result_handler",
|
||||
"redirect-timestamp": "2025-01-01T00:00:00.000Z",
|
||||
"token": "THIS IS NOT A TOKEN FOR TEAM 2",
|
||||
},
|
||||
@@ -48,7 +48,7 @@ exports[`IngestionConsumer general overflow force overflow should handle multipl
|
||||
{
|
||||
"headers": {
|
||||
"distinct_id": "user1",
|
||||
"redirect-step": "async_pipeline_result_handler",
|
||||
"redirect-step": "result_handler",
|
||||
"redirect-timestamp": "2025-01-01T00:00:00.000Z",
|
||||
"token": "THIS IS NOT A TOKEN FOR TEAM 2",
|
||||
},
|
||||
@@ -66,7 +66,7 @@ exports[`IngestionConsumer general overflow force overflow should handle multipl
|
||||
{
|
||||
"headers": {
|
||||
"distinct_id": "user2",
|
||||
"redirect-step": "async_pipeline_result_handler",
|
||||
"redirect-step": "result_handler",
|
||||
"redirect-timestamp": "2025-01-01T00:00:00.000Z",
|
||||
"token": "<REPLACED-UUID-1>",
|
||||
},
|
||||
|
||||
@@ -13,6 +13,7 @@ import { closeHub, createHub } from '../../utils/db/hub'
|
||||
import { PostgresUse } from '../../utils/db/postgres'
|
||||
import { parseJSON } from '../../utils/json-parse'
|
||||
import { UUID7 } from '../../utils/utils'
|
||||
import { isOkResult } from '../pipelines/results'
|
||||
import {
|
||||
COOKIELESS_MODE_FLAG_PROPERTY,
|
||||
COOKIELESS_SENTINEL_VALUE,
|
||||
@@ -282,8 +283,9 @@ describe('CookielessManager', () => {
|
||||
headers: { token?: string; distinct_id?: string; timestamp?: string } = {}
|
||||
): Promise<PipelineEvent | undefined> {
|
||||
const response = await hub.cookielessManager.doBatch([{ event, team, message, headers }])
|
||||
expect(response.length).toBeLessThanOrEqual(1)
|
||||
return response[0]?.event
|
||||
expect(response.length).toBe(1)
|
||||
const result = response[0]
|
||||
return isOkResult(result) ? result.value.event : undefined
|
||||
}
|
||||
|
||||
async function processEventWithHeaders(
|
||||
@@ -294,10 +296,11 @@ describe('CookielessManager', () => {
|
||||
headers: { token?: string; distinct_id?: string; timestamp?: string }
|
||||
}> {
|
||||
const response = await hub.cookielessManager.doBatch([{ event, team, message, headers }])
|
||||
expect(response.length).toBeLessThanOrEqual(1)
|
||||
expect(response.length).toBe(1)
|
||||
const result = response[0]
|
||||
return {
|
||||
event: response[0]?.event,
|
||||
headers: response[0]?.headers || {},
|
||||
event: isOkResult(result) ? result.value.event : undefined,
|
||||
headers: isOkResult(result) ? result.value.headers || {} : {},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ import { TeamManager } from '../../utils/team-manager'
|
||||
import { UUID7, bufferToUint32ArrayLE, uint32ArrayLEToBuffer } from '../../utils/utils'
|
||||
import { compareTimestamps } from '../../worker/ingestion/timestamp-comparison'
|
||||
import { toStartOfDayInTimezone, toYearMonthDayInTimezone } from '../../worker/ingestion/timestamps'
|
||||
import { PipelineResult, drop, ok } from '../pipelines/results'
|
||||
import { RedisHelpers } from './redis-helpers'
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
@@ -275,7 +276,7 @@ export class CookielessManager {
|
||||
return buf
|
||||
}
|
||||
|
||||
async doBatch(events: IncomingEventWithTeam[]): Promise<IncomingEventWithTeam[]> {
|
||||
async doBatch(events: IncomingEventWithTeam[]): Promise<PipelineResult<IncomingEventWithTeam>[]> {
|
||||
if (this.config.disabled) {
|
||||
// cookieless is globally disabled, don't do any processing just drop all cookieless events
|
||||
return this.dropAllCookielessEvents(events, 'cookieless_globally_disabled')
|
||||
@@ -296,15 +297,20 @@ export class CookielessManager {
|
||||
}
|
||||
}
|
||||
|
||||
private async doBatchInner(events: IncomingEventWithTeam[]): Promise<IncomingEventWithTeam[]> {
|
||||
private async doBatchInner(events: IncomingEventWithTeam[]): Promise<PipelineResult<IncomingEventWithTeam>[]> {
|
||||
const hashCache: Record<string, Buffer> = {}
|
||||
|
||||
// Track results for each input event - initialize all as success, will be overwritten if dropped
|
||||
const results: PipelineResult<IncomingEventWithTeam>[] = events.map((event) => ok(event))
|
||||
|
||||
// do a first pass just to extract properties and compute the base hash for stateful cookieless events
|
||||
const eventsWithStatus: EventWithStatus[] = []
|
||||
for (const { event, team, message, headers } of events) {
|
||||
for (let i = 0; i < events.length; i++) {
|
||||
const { event, team, message, headers } = events[i]
|
||||
|
||||
if (!event.properties?.[COOKIELESS_MODE_FLAG_PROPERTY]) {
|
||||
// push the event as is, we don't need to do anything with it, but preserve the ordering
|
||||
eventsWithStatus.push({ event, team, message, headers })
|
||||
eventsWithStatus.push({ event, team, message, headers, originalIndex: i })
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -318,6 +324,7 @@ export class CookielessManager {
|
||||
drop_cause: 'cookieless_disallowed_event',
|
||||
})
|
||||
.inc()
|
||||
results[i] = drop('Event type not supported in cookieless mode')
|
||||
continue
|
||||
}
|
||||
if (
|
||||
@@ -331,6 +338,7 @@ export class CookielessManager {
|
||||
drop_cause: 'cookieless_stateless_disallowed_identify',
|
||||
})
|
||||
.inc()
|
||||
results[i] = drop('$identify not supported in stateless cookieless mode')
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -345,6 +353,7 @@ export class CookielessManager {
|
||||
drop_cause: 'cookieless_team_disabled',
|
||||
})
|
||||
.inc()
|
||||
results[i] = drop('Cookieless disabled for team')
|
||||
continue
|
||||
}
|
||||
const timestamp = event.timestamp ?? event.sent_at ?? event.now
|
||||
@@ -356,6 +365,7 @@ export class CookielessManager {
|
||||
drop_cause: 'cookieless_no_timestamp',
|
||||
})
|
||||
.inc()
|
||||
results[i] = drop('Missing timestamp')
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -388,6 +398,7 @@ export class CookielessManager {
|
||||
: 'cookieless_missing_host',
|
||||
})
|
||||
.inc()
|
||||
results[i] = drop(!userAgent ? 'Missing user agent' : !ip ? 'Missing IP' : 'Missing host')
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -408,6 +419,7 @@ export class CookielessManager {
|
||||
team,
|
||||
message,
|
||||
headers,
|
||||
originalIndex: i,
|
||||
firstPass: {
|
||||
timestampMs,
|
||||
eventTimeZone,
|
||||
@@ -422,7 +434,7 @@ export class CookielessManager {
|
||||
|
||||
// early exit if we don't need to do anything
|
||||
if (!eventsWithStatus.some((e) => e.firstPass)) {
|
||||
return eventsWithStatus
|
||||
return results
|
||||
}
|
||||
|
||||
// Do a second pass to see what `identifiesRedisKey`s we need to load from redis for stateful events.
|
||||
@@ -612,13 +624,19 @@ export class CookielessManager {
|
||||
)
|
||||
}
|
||||
|
||||
// remove the extra processing state from the returned object
|
||||
return eventsWithStatus.map(({ event, team, message, headers }) => ({ event, team, message, headers }))
|
||||
// Update results with successfully processed events
|
||||
for (const { event, team, message, headers, originalIndex } of eventsWithStatus) {
|
||||
results[originalIndex] = ok({ event, team, message, headers })
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
dropAllCookielessEvents(events: IncomingEventWithTeam[], dropCause: string): IncomingEventWithTeam[] {
|
||||
const nonCookielessEvents: IncomingEventWithTeam[] = []
|
||||
for (const incomingEvent of events) {
|
||||
dropAllCookielessEvents(
|
||||
events: IncomingEventWithTeam[],
|
||||
dropCause: string
|
||||
): PipelineResult<IncomingEventWithTeam>[] {
|
||||
return events.map((incomingEvent) => {
|
||||
if (incomingEvent.event.properties?.[COOKIELESS_MODE_FLAG_PROPERTY]) {
|
||||
eventDroppedCounter
|
||||
.labels({
|
||||
@@ -626,11 +644,11 @@ export class CookielessManager {
|
||||
drop_cause: dropCause,
|
||||
})
|
||||
.inc()
|
||||
return drop(dropCause)
|
||||
} else {
|
||||
nonCookielessEvents.push(incomingEvent)
|
||||
return ok(incomingEvent)
|
||||
}
|
||||
}
|
||||
return nonCookielessEvents
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -639,6 +657,7 @@ type EventWithStatus = {
|
||||
event: PipelineEvent
|
||||
team: Team
|
||||
headers: EventHeaders
|
||||
originalIndex: number
|
||||
// Store temporary processing state. Nest the passes to make type-checking easier
|
||||
firstPass?: {
|
||||
timestampMs: number
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
import { Hub, IncomingEventWithTeam } from '../../types'
|
||||
import { PipelineResult, isOkResult, ok } from '../pipelines/results'
|
||||
|
||||
export function createApplyCookielessProcessingStep<T extends { eventWithTeam: IncomingEventWithTeam }>(hub: Hub) {
|
||||
return async function applyCookielessProcessingStep(events: T[]): Promise<PipelineResult<T>[]> {
|
||||
const cookielessResults = await hub.cookielessManager.doBatch(events.map((x) => x.eventWithTeam))
|
||||
|
||||
return events.map((event, index) => {
|
||||
const cookielessResult = cookielessResults[index]
|
||||
|
||||
if (isOkResult(cookielessResult)) {
|
||||
return ok({
|
||||
...event,
|
||||
eventWithTeam: cookielessResult.value,
|
||||
})
|
||||
} else {
|
||||
// Return the drop/dlq/redirect result from cookieless processing
|
||||
return cookielessResult
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
import { eventDroppedCounter } from '../../main/ingestion-queues/metrics'
|
||||
import { EventHeaders } from '../../types'
|
||||
import { EventIngestionRestrictionManager } from '../../utils/event-ingestion-restriction-manager'
|
||||
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { SyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { drop, ok } from '../pipelines/results'
|
||||
import { SyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
function applyDropEventsRestrictions(
|
||||
eventIngestionRestrictionManager: EventIngestionRestrictionManager,
|
||||
@@ -16,8 +16,8 @@ function applyDropEventsRestrictions(
|
||||
|
||||
export function createApplyDropRestrictionsStep<T extends { headers: EventHeaders }>(
|
||||
eventIngestionRestrictionManager: EventIngestionRestrictionManager
|
||||
): SyncPreprocessingStep<T, T> {
|
||||
return (input) => {
|
||||
): SyncProcessingStep<T, T> {
|
||||
return function applyDropRestrictionsStep(input) {
|
||||
const { headers } = input
|
||||
|
||||
if (applyDropEventsRestrictions(eventIngestionRestrictionManager, headers)) {
|
||||
@@ -30,6 +30,6 @@ export function createApplyDropRestrictionsStep<T extends { headers: EventHeader
|
||||
return drop('Event dropped due to token restrictions')
|
||||
}
|
||||
|
||||
return success(input)
|
||||
return ok(input)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { ingestionOverflowingMessagesTotal } from '../../main/ingestion-queues/batch-processing/metrics'
|
||||
import { EventHeaders } from '../../types'
|
||||
import { EventIngestionRestrictionManager } from '../../utils/event-ingestion-restriction-manager'
|
||||
import { redirect, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { SyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { ok, redirect } from '../pipelines/results'
|
||||
import { SyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
export type ForceOverflowDecision = {
|
||||
shouldRedirect: boolean
|
||||
@@ -37,17 +37,17 @@ function applyForceOverflowRestrictions(
|
||||
export function createApplyForceOverflowRestrictionsStep<T extends { headers: EventHeaders }>(
|
||||
eventIngestionRestrictionManager: EventIngestionRestrictionManager,
|
||||
overflowConfig: OverflowConfig
|
||||
): SyncPreprocessingStep<T, T> {
|
||||
return (input) => {
|
||||
): SyncProcessingStep<T, T> {
|
||||
return function applyForceOverflowRestrictionsStep(input) {
|
||||
const { headers } = input
|
||||
|
||||
if (!overflowConfig.overflowEnabled) {
|
||||
return success(input)
|
||||
return ok(input)
|
||||
}
|
||||
|
||||
const forceOverflowDecision = applyForceOverflowRestrictions(eventIngestionRestrictionManager, headers)
|
||||
if (!forceOverflowDecision.shouldRedirect) {
|
||||
return success(input)
|
||||
return ok(input)
|
||||
}
|
||||
|
||||
ingestionOverflowingMessagesTotal.inc()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { IncomingEventWithTeam } from '../../types'
|
||||
import { EventIngestionRestrictionManager } from '../../utils/event-ingestion-restriction-manager'
|
||||
import { success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { SyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { ok } from '../pipelines/results'
|
||||
import { SyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
function applyPersonProcessingRestrictions(
|
||||
eventWithTeam: IncomingEventWithTeam,
|
||||
@@ -28,10 +28,10 @@ function applyPersonProcessingRestrictions(
|
||||
// TODO: Refactor this to use just headers and the team before parsing the event
|
||||
export function createApplyPersonProcessingRestrictionsStep<T extends { eventWithTeam: IncomingEventWithTeam }>(
|
||||
eventIngestionRestrictionManager: EventIngestionRestrictionManager
|
||||
): SyncPreprocessingStep<T, T> {
|
||||
return (input) => {
|
||||
): SyncProcessingStep<T, T> {
|
||||
return function applyPersonProcessingRestrictionsStep(input) {
|
||||
const { eventWithTeam } = input
|
||||
applyPersonProcessingRestrictions(eventWithTeam, eventIngestionRestrictionManager)
|
||||
return success(input)
|
||||
return ok(input)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
export { createApplyCookielessProcessingStep } from './apply-cookieless-processing'
|
||||
export { createApplyDropRestrictionsStep } from './apply-drop-events-restrictions'
|
||||
export { createApplyForceOverflowRestrictionsStep } from './apply-force-overflow-restrictions'
|
||||
export { createApplyPersonProcessingRestrictionsStep } from './apply-person-processing-restrictions'
|
||||
export { createParseHeadersStep } from './parse-headers'
|
||||
export { createParseKafkaMessageStep } from './parse-kafka-message'
|
||||
export { createResolveTeamStep } from './resolve-team'
|
||||
export { createValidateEventUuidStep } from './validate-event-uuid'
|
||||
export { createApplyForceOverflowRestrictionsStep } from './apply-force-overflow-restrictions'
|
||||
export { createParseHeadersStep } from './parse-headers'
|
||||
|
||||
@@ -2,16 +2,16 @@ import { Message } from 'node-rdkafka'
|
||||
|
||||
import { parseEventHeaders } from '../../kafka/consumer'
|
||||
import { EventHeaders } from '../../types'
|
||||
import { success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { SyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { ok } from '../pipelines/results'
|
||||
import { SyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
export function createParseHeadersStep<T extends { message: Pick<Message, 'headers'> }>(): SyncPreprocessingStep<
|
||||
export function createParseHeadersStep<T extends { message: Pick<Message, 'headers'> }>(): SyncProcessingStep<
|
||||
T,
|
||||
T & { headers: EventHeaders }
|
||||
> {
|
||||
return (input) => {
|
||||
return function parseHeadersStep(input) {
|
||||
const { message } = input
|
||||
const parsedHeaders = parseEventHeaders(message.headers)
|
||||
return success({ ...input, headers: parsedHeaders })
|
||||
return ok({ ...input, headers: parsedHeaders })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@ import { IncomingEvent, PipelineEvent } from '../../types'
|
||||
import { normalizeEvent } from '../../utils/event'
|
||||
import { parseJSON } from '../../utils/json-parse'
|
||||
import { logger } from '../../utils/logger'
|
||||
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { SyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { drop, ok } from '../pipelines/results'
|
||||
import { SyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
function parseKafkaMessage(message: Message): IncomingEvent | null {
|
||||
try {
|
||||
@@ -21,11 +21,11 @@ function parseKafkaMessage(message: Message): IncomingEvent | null {
|
||||
}
|
||||
}
|
||||
|
||||
export function createParseKafkaMessageStep<T extends { message: Message }>(): SyncPreprocessingStep<
|
||||
export function createParseKafkaMessageStep<T extends { message: Message }>(): SyncProcessingStep<
|
||||
T,
|
||||
T & { event: IncomingEvent }
|
||||
> {
|
||||
return (input) => {
|
||||
return function parseKafkaMessageStep(input) {
|
||||
const { message } = input
|
||||
|
||||
const parsedEvent = parseKafkaMessage(message)
|
||||
@@ -33,6 +33,6 @@ export function createParseKafkaMessageStep<T extends { message: Message }>(): S
|
||||
return drop('Failed to parse Kafka message')
|
||||
}
|
||||
|
||||
return success({ ...input, event: parsedEvent })
|
||||
return ok({ ...input, event: parsedEvent })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@ import { Message } from 'node-rdkafka'
|
||||
import { eventDroppedCounter } from '../../main/ingestion-queues/metrics'
|
||||
import { EventHeaders, Hub, IncomingEvent, IncomingEventWithTeam } from '../../types'
|
||||
import { tokenOrTeamPresentCounter } from '../../worker/ingestion/event-pipeline/metrics'
|
||||
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { AsyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { drop, ok } from '../pipelines/results'
|
||||
import { AsyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
async function resolveTeam(
|
||||
hub: Pick<Hub, 'teamManager'>,
|
||||
@@ -51,8 +51,8 @@ async function resolveTeam(
|
||||
|
||||
export function createResolveTeamStep<T extends { message: Message; headers: EventHeaders; event: IncomingEvent }>(
|
||||
hub: Hub
|
||||
): AsyncPreprocessingStep<T, T & { eventWithTeam: IncomingEventWithTeam }> {
|
||||
return async (input) => {
|
||||
): AsyncProcessingStep<T, T & { eventWithTeam: IncomingEventWithTeam }> {
|
||||
return async function resolveTeamStep(input) {
|
||||
const { message, headers, event } = input
|
||||
|
||||
const eventWithTeam = await resolveTeam(hub, message, headers, event.event)
|
||||
@@ -61,6 +61,6 @@ export function createResolveTeamStep<T extends { message: Message; headers: Eve
|
||||
return drop('Failed to resolve team')
|
||||
}
|
||||
|
||||
return success({ ...input, eventWithTeam })
|
||||
return ok({ ...input, eventWithTeam })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { eventDroppedCounter } from '../../main/ingestion-queues/metrics'
|
||||
import { Hub, IncomingEventWithTeam } from '../../types'
|
||||
import { UUID } from '../../utils/utils'
|
||||
import { drop, success } from '../../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { captureIngestionWarning } from '../../worker/ingestion/utils'
|
||||
import { AsyncPreprocessingStep } from '../processing-pipeline'
|
||||
import { drop, ok } from '../pipelines/results'
|
||||
import { AsyncProcessingStep } from '../pipelines/steps'
|
||||
|
||||
async function isEventUuidValid(eventWithTeam: IncomingEventWithTeam, hub: Pick<Hub, 'db'>): Promise<boolean> {
|
||||
const { event, team } = eventWithTeam
|
||||
@@ -39,13 +39,13 @@ async function isEventUuidValid(eventWithTeam: IncomingEventWithTeam, hub: Pick<
|
||||
|
||||
export function createValidateEventUuidStep<T extends { eventWithTeam: IncomingEventWithTeam }>(
|
||||
hub: Hub
|
||||
): AsyncPreprocessingStep<T, T> {
|
||||
return async (input) => {
|
||||
): AsyncProcessingStep<T, T> {
|
||||
return async function validateEventUuidStep(input) {
|
||||
const { eventWithTeam } = input
|
||||
const isValid = await isEventUuidValid(eventWithTeam, hub)
|
||||
if (!isValid) {
|
||||
return drop('Event has invalid UUID')
|
||||
}
|
||||
return success(input)
|
||||
return ok(input)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,10 +31,10 @@ import { BatchWritingGroupStore } from '../worker/ingestion/groups/batch-writing
|
||||
import { GroupStoreForBatch } from '../worker/ingestion/groups/group-store-for-batch.interface'
|
||||
import { BatchWritingPersonsStore } from '../worker/ingestion/persons/batch-writing-person-store'
|
||||
import { FlushResult, PersonsStoreForBatch } from '../worker/ingestion/persons/persons-store-for-batch'
|
||||
import { PipelineConfig, ResultHandlingPipeline } from '../worker/ingestion/result-handling-pipeline'
|
||||
import { deduplicateEvents } from './deduplication/events'
|
||||
import { DeduplicationRedis, createDeduplicationRedis } from './deduplication/redis-client'
|
||||
import {
|
||||
createApplyCookielessProcessingStep,
|
||||
createApplyDropRestrictionsStep,
|
||||
createApplyForceOverflowRestrictionsStep,
|
||||
createApplyPersonProcessingRestrictionsStep,
|
||||
@@ -43,6 +43,8 @@ import {
|
||||
createResolveTeamStep,
|
||||
createValidateEventUuidStep,
|
||||
} from './event-preprocessing'
|
||||
import { createBatch, createNewBatchPipeline, createNewPipeline } from './pipelines/helpers'
|
||||
import { PipelineConfig, ResultHandlingPipeline } from './pipelines/result-handling-pipeline'
|
||||
import { MemoryRateLimiter } from './utils/overflow-detector'
|
||||
|
||||
const ingestionEventOverflowed = new Counter({
|
||||
@@ -113,7 +115,7 @@ export class IngestionConsumer {
|
||||
private deduplicationRedis: DeduplicationRedis
|
||||
public readonly promiseScheduler = new PromiseScheduler()
|
||||
|
||||
private preprocessingPipeline: (message: Message) => Promise<PreprocessedEvent | null>
|
||||
private batchPreprocessingPipeline!: ResultHandlingPipeline<{ message: Message }, PreprocessedEvent>
|
||||
|
||||
constructor(
|
||||
private hub: Hub,
|
||||
@@ -174,37 +176,6 @@ export class IngestionConsumer {
|
||||
groupId: this.groupId,
|
||||
topic: this.topic,
|
||||
})
|
||||
|
||||
// Initialize preprocessing pipeline
|
||||
this.preprocessingPipeline = async (message: Message) => {
|
||||
const pipelineConfig: PipelineConfig = {
|
||||
kafkaProducer: this.kafkaProducer!,
|
||||
dlqTopic: this.dlqTopic,
|
||||
promiseScheduler: this.promiseScheduler,
|
||||
}
|
||||
|
||||
try {
|
||||
const pipeline = ResultHandlingPipeline.of({ message }, message, pipelineConfig)
|
||||
.pipe(createParseHeadersStep())
|
||||
.pipe(createApplyDropRestrictionsStep(this.eventIngestionRestrictionManager))
|
||||
.pipe(
|
||||
createApplyForceOverflowRestrictionsStep(this.eventIngestionRestrictionManager, {
|
||||
overflowEnabled: this.overflowEnabled(),
|
||||
overflowTopic: this.overflowTopic || '',
|
||||
preservePartitionLocality: this.hub.INGESTION_OVERFLOW_PRESERVE_PARTITION_LOCALITY,
|
||||
})
|
||||
)
|
||||
.pipe(createParseKafkaMessageStep())
|
||||
.pipeAsync(createResolveTeamStep(this.hub))
|
||||
.pipe(createApplyPersonProcessingRestrictionsStep(this.eventIngestionRestrictionManager))
|
||||
.pipeAsync(createValidateEventUuidStep(this.hub))
|
||||
|
||||
return await pipeline.unwrap()
|
||||
} catch (error) {
|
||||
console.error('Error processing message in pipeline:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public get service(): PluginServerService {
|
||||
@@ -227,6 +198,9 @@ export class IngestionConsumer {
|
||||
}),
|
||||
])
|
||||
|
||||
// Initialize batch preprocessing pipeline after kafka producer is available
|
||||
this.initializePipeline()
|
||||
|
||||
await this.kafkaConsumer.connect(async (messages) => {
|
||||
return await instrumentFn(
|
||||
{
|
||||
@@ -238,6 +212,39 @@ export class IngestionConsumer {
|
||||
})
|
||||
}
|
||||
|
||||
private initializePipeline(): void {
|
||||
const pipelineConfig: PipelineConfig = {
|
||||
kafkaProducer: this.kafkaProducer!,
|
||||
dlqTopic: this.dlqTopic,
|
||||
promiseScheduler: this.promiseScheduler,
|
||||
}
|
||||
|
||||
// Create preprocessing pipeline
|
||||
const preprocessingPipeline = createNewPipeline()
|
||||
.pipe(createParseHeadersStep())
|
||||
.pipe(createApplyDropRestrictionsStep(this.eventIngestionRestrictionManager))
|
||||
.pipe(
|
||||
createApplyForceOverflowRestrictionsStep(this.eventIngestionRestrictionManager, {
|
||||
overflowEnabled: this.overflowEnabled(),
|
||||
overflowTopic: this.overflowTopic || '',
|
||||
preservePartitionLocality: this.hub.INGESTION_OVERFLOW_PRESERVE_PARTITION_LOCALITY,
|
||||
})
|
||||
)
|
||||
.pipe(createParseKafkaMessageStep())
|
||||
.pipeAsync(createResolveTeamStep(this.hub))
|
||||
.pipe(createApplyPersonProcessingRestrictionsStep(this.eventIngestionRestrictionManager))
|
||||
.pipeAsync(createValidateEventUuidStep(this.hub))
|
||||
|
||||
// Create the batch processing pipeline with fluent API
|
||||
const batchPipeline = createNewBatchPipeline()
|
||||
.pipeConcurrently(preprocessingPipeline)
|
||||
.gather()
|
||||
.pipeBatch(createApplyCookielessProcessingStep(this.hub))
|
||||
|
||||
// Wrap it in the result handling pipeline
|
||||
this.batchPreprocessingPipeline = ResultHandlingPipeline.of(batchPipeline, pipelineConfig)
|
||||
}
|
||||
|
||||
public async stop(): Promise<void> {
|
||||
logger.info('🔁', `${this.name} - stopping`)
|
||||
this.isStopping = true
|
||||
@@ -308,10 +315,7 @@ export class IngestionConsumer {
|
||||
preprocessedEvents.map((x) => x.event)
|
||||
)
|
||||
)
|
||||
const postCookielessMessages = await this.runInstrumented('cookielessProcessing', () =>
|
||||
this.hub.cookielessManager.doBatch(preprocessedEvents.map((x) => x.eventWithTeam))
|
||||
)
|
||||
const eventsPerDistinctId = this.groupEventsByDistinctId(postCookielessMessages)
|
||||
const eventsPerDistinctId = this.groupEventsByDistinctId(preprocessedEvents.map((x) => x.eventWithTeam))
|
||||
|
||||
// Check if hogwatcher should be used (using the same sampling logic as in the transformer)
|
||||
const shouldRunHogWatcher = Math.random() < this.hub.CDP_HOG_WATCHER_SAMPLE_RATE
|
||||
@@ -610,12 +614,21 @@ export class IngestionConsumer {
|
||||
}
|
||||
|
||||
private async preprocessEvents(messages: Message[]): Promise<PreprocessedEvent[]> {
|
||||
const pipelinePromises = messages.map(async (message) => {
|
||||
return await this.preprocessingPipeline(message)
|
||||
})
|
||||
// Create batch using the helper function
|
||||
const batch = createBatch(messages)
|
||||
|
||||
const results = await Promise.all(pipelinePromises)
|
||||
return results.filter((result): result is PreprocessedEvent => result !== null)
|
||||
// Feed batch to the pipeline
|
||||
this.batchPreprocessingPipeline.feed(batch)
|
||||
|
||||
// Get all results from the gather pipeline (should return all results in one call)
|
||||
const result = await this.batchPreprocessingPipeline.next()
|
||||
|
||||
if (result === null) {
|
||||
return []
|
||||
}
|
||||
|
||||
// Return the results (already filtered to successful ones by ResultHandlingPipeline)
|
||||
return result
|
||||
}
|
||||
|
||||
private groupEventsByDistinctId(messages: IncomingEventWithTeam[]): IncomingEventsByDistinctId {
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
import { PipelineResultWithContext } from './pipeline.interface'
|
||||
|
||||
/**
|
||||
* Batch processing result type
|
||||
*/
|
||||
export type BatchPipelineResultWithContext<T> = PipelineResultWithContext<T>[]
|
||||
|
||||
/**
|
||||
* Interface for batch processing pipelines
|
||||
*/
|
||||
export interface BatchPipeline<TInput, TIntermediate> {
|
||||
feed(elements: BatchPipelineResultWithContext<TInput>): void
|
||||
next(): Promise<BatchPipelineResultWithContext<TIntermediate> | null>
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { BufferingBatchPipeline } from './buffering-batch-pipeline'
|
||||
import { dlq, drop, ok, redirect } from './results'
|
||||
|
||||
describe('BufferingBatchPipeline', () => {
|
||||
let message1: Message
|
||||
let message2: Message
|
||||
let message3: Message
|
||||
let context1: { message: Message }
|
||||
let context2: { message: Message }
|
||||
let context3: { message: Message }
|
||||
|
||||
beforeEach(() => {
|
||||
// Create different mock messages with unique properties
|
||||
message1 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 1,
|
||||
key: Buffer.from('key1'),
|
||||
value: Buffer.from('value1'),
|
||||
timestamp: Date.now(),
|
||||
} as Message
|
||||
|
||||
message2 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 2,
|
||||
key: Buffer.from('key2'),
|
||||
value: Buffer.from('value2'),
|
||||
timestamp: Date.now() + 1,
|
||||
} as Message
|
||||
|
||||
message3 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 3,
|
||||
key: Buffer.from('key3'),
|
||||
value: Buffer.from('value3'),
|
||||
timestamp: Date.now() + 2,
|
||||
} as Message
|
||||
|
||||
context1 = { message: message1 }
|
||||
context2 = { message: message2 }
|
||||
context3 = { message: message3 }
|
||||
})
|
||||
|
||||
describe('constructor', () => {
|
||||
it('should create instance with default type', () => {
|
||||
const pipeline = new BufferingBatchPipeline()
|
||||
expect(pipeline).toBeInstanceOf(BufferingBatchPipeline)
|
||||
})
|
||||
|
||||
it('should create instance with custom type', () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
expect(pipeline).toBeInstanceOf(BufferingBatchPipeline)
|
||||
})
|
||||
})
|
||||
|
||||
describe('feed', () => {
|
||||
it('should add elements to buffer', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const batch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
]
|
||||
|
||||
pipeline.feed(batch)
|
||||
|
||||
// Buffer is internal, so we test through next()
|
||||
const result = await pipeline.next()
|
||||
expect(result).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
])
|
||||
})
|
||||
|
||||
it('should accumulate multiple feeds', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('hello'), context: context1 }]
|
||||
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('world'), context: context2 }]
|
||||
|
||||
pipeline.feed(batch1)
|
||||
pipeline.feed(batch2)
|
||||
|
||||
const result = await pipeline.next()
|
||||
expect(result).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
])
|
||||
})
|
||||
|
||||
it('should handle empty batch', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const emptyBatch: BatchPipelineResultWithContext<string> = []
|
||||
|
||||
pipeline.feed(emptyBatch)
|
||||
|
||||
const result = await pipeline.next()
|
||||
expect(result).toEqual(null)
|
||||
})
|
||||
})
|
||||
|
||||
describe('next', () => {
|
||||
it('should return null when buffer is empty', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const result = await pipeline.next()
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
it('should return all buffered elements and clear buffer', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const batch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
]
|
||||
|
||||
pipeline.feed(batch)
|
||||
|
||||
const result1 = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
|
||||
expect(result1).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should handle mixed result types', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const dropResult = drop<string>('test drop')
|
||||
const dlqResult = dlq<string>('test dlq', new Error('test error'))
|
||||
const redirectResult = redirect<string>('test redirect', 'test-topic')
|
||||
|
||||
const batch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: dropResult, context: context2 },
|
||||
{ result: dlqResult, context: context3 },
|
||||
{ result: redirectResult, context: context1 },
|
||||
]
|
||||
|
||||
pipeline.feed(batch)
|
||||
|
||||
const result = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: dropResult, context: context2 },
|
||||
{ result: dlqResult, context: context3 },
|
||||
{ result: redirectResult, context: context1 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should preserve order of fed elements', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('first'), context: context1 }]
|
||||
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('second'), context: context2 }]
|
||||
const batch3: BatchPipelineResultWithContext<string> = [{ result: ok('third'), context: context3 }]
|
||||
|
||||
pipeline.feed(batch1)
|
||||
pipeline.feed(batch2)
|
||||
pipeline.feed(batch3)
|
||||
|
||||
const result = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: ok('first'), context: context1 },
|
||||
{ result: ok('second'), context: context2 },
|
||||
{ result: ok('third'), context: context3 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should handle large number of elements', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
const batch: BatchPipelineResultWithContext<string> = []
|
||||
|
||||
for (let i = 0; i < 100; i++) {
|
||||
batch.push({ result: ok(`item${i}`), context: context1 })
|
||||
}
|
||||
|
||||
pipeline.feed(batch)
|
||||
|
||||
const result = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
|
||||
expect(result).toHaveLength(100)
|
||||
expect(result![0]).toEqual({ result: ok('item0'), context: context1 })
|
||||
expect(result![99]).toEqual({ result: ok('item99'), context: context1 })
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should resume after returning null when more elements are fed', async () => {
|
||||
const pipeline = new BufferingBatchPipeline<string>()
|
||||
|
||||
// First round: feed and process
|
||||
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('first'), context: context1 }]
|
||||
pipeline.feed(batch1)
|
||||
|
||||
const result1 = await pipeline.next()
|
||||
expect(result1).toEqual([{ result: ok('first'), context: context1 }])
|
||||
|
||||
// Should return null when buffer is empty
|
||||
const result2 = await pipeline.next()
|
||||
expect(result2).toBeNull()
|
||||
|
||||
// Feed more elements
|
||||
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('second'), context: context2 }]
|
||||
pipeline.feed(batch2)
|
||||
|
||||
// Should resume processing
|
||||
const result3 = await pipeline.next()
|
||||
expect(result3).toEqual([{ result: ok('second'), context: context2 }])
|
||||
|
||||
// Should return null again
|
||||
const result4 = await pipeline.next()
|
||||
expect(result4).toBeNull()
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,26 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { ConcurrentBatchProcessingPipeline } from './concurrent-batch-pipeline'
|
||||
import { Pipeline } from './pipeline.interface'
|
||||
|
||||
export class BufferingBatchPipeline<T = { message: Message }> implements BatchPipeline<T, T> {
|
||||
private buffer: BatchPipelineResultWithContext<T> = []
|
||||
|
||||
feed(elements: BatchPipelineResultWithContext<T>): void {
|
||||
this.buffer.push(...elements)
|
||||
}
|
||||
|
||||
async next(): Promise<BatchPipelineResultWithContext<T> | null> {
|
||||
if (this.buffer.length === 0) {
|
||||
return null
|
||||
}
|
||||
const results = this.buffer
|
||||
this.buffer = []
|
||||
return Promise.resolve(results)
|
||||
}
|
||||
|
||||
pipeConcurrently<U>(processor: Pipeline<T, U>): ConcurrentBatchProcessingPipeline<T, T, U> {
|
||||
return new ConcurrentBatchProcessingPipeline(processor, this)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,313 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { ConcurrentBatchProcessingPipeline } from './concurrent-batch-pipeline'
|
||||
import { createNewBatchPipeline, createNewPipeline } from './helpers'
|
||||
import { dlq, drop, ok, redirect } from './results'
|
||||
|
||||
describe('ConcurrentBatchProcessingPipeline', () => {
|
||||
let message1: Message
|
||||
let message2: Message
|
||||
let message3: Message
|
||||
let context1: { message: Message }
|
||||
let context2: { message: Message }
|
||||
let context3: { message: Message }
|
||||
|
||||
beforeEach(() => {
|
||||
// Create different mock messages with unique properties
|
||||
message1 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 1,
|
||||
key: Buffer.from('key1'),
|
||||
value: Buffer.from('value1'),
|
||||
timestamp: Date.now(),
|
||||
} as Message
|
||||
|
||||
message2 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 2,
|
||||
key: Buffer.from('key2'),
|
||||
value: Buffer.from('value2'),
|
||||
timestamp: Date.now() + 1,
|
||||
} as Message
|
||||
|
||||
message3 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 3,
|
||||
key: Buffer.from('key3'),
|
||||
value: Buffer.from('value3'),
|
||||
timestamp: Date.now() + 2,
|
||||
} as Message
|
||||
|
||||
context1 = { message: message1 }
|
||||
context2 = { message: message2 }
|
||||
context3 = { message: message3 }
|
||||
})
|
||||
|
||||
describe('constructor', () => {
|
||||
it('should create instance with processor and previous pipeline', () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
expect(pipeline).toBeInstanceOf(ConcurrentBatchProcessingPipeline)
|
||||
})
|
||||
})
|
||||
|
||||
describe('feed', () => {
|
||||
it('should delegate to previous pipeline', () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const spy = jest.spyOn(previousPipeline, 'feed')
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [{ result: ok('test'), context: context1 }]
|
||||
|
||||
pipeline.feed(testBatch)
|
||||
|
||||
expect(spy).toHaveBeenCalledWith(testBatch)
|
||||
})
|
||||
})
|
||||
|
||||
describe('next', () => {
|
||||
it('should return null when no results available', async () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
const result = await pipeline.next()
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
it('should process successful results concurrently', async () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
|
||||
// Feed some test data
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
const result1 = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
const result3 = await pipeline.next()
|
||||
|
||||
expect(result1).toEqual([{ result: ok('HELLO'), context: context1 }])
|
||||
expect(result2).toEqual([{ result: ok('WORLD'), context: context2 }])
|
||||
expect(result3).toBeNull()
|
||||
})
|
||||
|
||||
it('should preserve non-success results without processing', async () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
|
||||
const dropResult = drop<string>('test drop')
|
||||
const dlqResult = dlq<string>('test dlq', new Error('test error'))
|
||||
const redirectResult = redirect<string>('test redirect', 'test-topic')
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: dropResult, context: context1 },
|
||||
{ result: dlqResult, context: context2 },
|
||||
{ result: redirectResult, context: context3 },
|
||||
]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
const result1 = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
const result3 = await pipeline.next()
|
||||
const result4 = await pipeline.next()
|
||||
|
||||
expect(result1).toEqual([{ result: dropResult, context: context1 }])
|
||||
expect(result2).toEqual([{ result: dlqResult, context: context2 }])
|
||||
expect(result3).toEqual([{ result: redirectResult, context: context3 }])
|
||||
expect(result4).toBeNull()
|
||||
})
|
||||
|
||||
it('should handle mixed success and non-success results', async () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
|
||||
const dropResult = drop<string>('test drop')
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: dropResult, context: context2 },
|
||||
{ result: ok('world'), context: context3 },
|
||||
]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
const result1 = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
const result3 = await pipeline.next()
|
||||
const result4 = await pipeline.next()
|
||||
|
||||
expect(result1).toEqual([{ result: ok('HELLO'), context: context1 }])
|
||||
expect(result2).toEqual([{ result: dropResult, context: context2 }])
|
||||
expect(result3).toEqual([{ result: ok('WORLD'), context: context3 }])
|
||||
expect(result4).toBeNull()
|
||||
})
|
||||
|
||||
it('should handle async processing delays correctly', async () => {
|
||||
const processor = createNewPipeline<string>().pipeAsync(async (input: string) => {
|
||||
// Simulate async delay
|
||||
await new Promise((resolve) => setTimeout(resolve, 10))
|
||||
return ok(input.toUpperCase())
|
||||
})
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('fast'), context: context1 },
|
||||
{ result: ok('slow'), context: context2 },
|
||||
]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
const startTime = Date.now()
|
||||
const result1 = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
const endTime = Date.now()
|
||||
|
||||
expect(result1).toEqual([{ result: ok('FAST'), context: context1 }])
|
||||
expect(result2).toEqual([{ result: ok('SLOW'), context: context2 }])
|
||||
// Both should complete around the same time due to concurrent processing
|
||||
expect(endTime - startTime).toBeLessThan(50) // Should be much less than 20ms
|
||||
})
|
||||
|
||||
it('should handle processor errors gracefully', async () => {
|
||||
const processor = createNewPipeline<string>().pipeAsync((_input: string) => {
|
||||
return Promise.reject(new Error('Processor error'))
|
||||
})
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [{ result: ok('test'), context: context1 }]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
await expect(pipeline.next()).rejects.toThrow('Processor error')
|
||||
})
|
||||
|
||||
it('should process multiple batches sequentially', async () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const batch1: BatchPipelineResultWithContext<string> = [{ result: ok('batch1'), context: context1 }]
|
||||
const batch2: BatchPipelineResultWithContext<string> = [{ result: ok('batch2'), context: context2 }]
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
// First batch: feed then next
|
||||
previousPipeline.feed(batch1)
|
||||
const result1 = await pipeline.next()
|
||||
expect(result1).toEqual([{ result: ok('BATCH1'), context: context1 }])
|
||||
|
||||
// Second batch: feed then next
|
||||
previousPipeline.feed(batch2)
|
||||
const result2 = await pipeline.next()
|
||||
expect(result2).toEqual([{ result: ok('BATCH2'), context: context2 }])
|
||||
|
||||
// Third call should return null
|
||||
const result3 = await pipeline.next()
|
||||
expect(result3).toBeNull()
|
||||
})
|
||||
|
||||
it('should maintain promise queue state between calls', async () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input.toUpperCase()))
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('item1'), context: context1 },
|
||||
{ result: ok('item2'), context: context2 },
|
||||
{ result: ok('item3'), context: context3 },
|
||||
]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
// First call should process first item
|
||||
const result1 = await pipeline.next()
|
||||
expect(result1).toEqual([{ result: ok('ITEM1'), context: context1 }])
|
||||
|
||||
// Second call should process second item
|
||||
const result2 = await pipeline.next()
|
||||
expect(result2).toEqual([{ result: ok('ITEM2'), context: context2 }])
|
||||
|
||||
// Third call should process third item
|
||||
const result3 = await pipeline.next()
|
||||
expect(result3).toEqual([{ result: ok('ITEM3'), context: context3 }])
|
||||
|
||||
// Fourth call should return null
|
||||
const result4 = await pipeline.next()
|
||||
expect(result4).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('gather', () => {
|
||||
it('should return GatheringBatchPipeline instance', () => {
|
||||
const processor = createNewPipeline<string>().pipe((input: string) => ok(input))
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
const gatherPipeline = pipeline.gather()
|
||||
|
||||
expect(gatherPipeline).toBeDefined()
|
||||
expect(gatherPipeline.constructor.name).toBe('GatheringBatchPipeline')
|
||||
})
|
||||
})
|
||||
|
||||
describe('concurrent processing behavior', () => {
|
||||
it('should process items concurrently within a batch', async () => {
|
||||
const processingOrder: string[] = []
|
||||
const processor = createNewPipeline<string>().pipeAsync(async (input: string) => {
|
||||
processingOrder.push(`start-${input}`)
|
||||
// Simulate different processing times
|
||||
const delay = input === 'slow' ? 50 : 10
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
processingOrder.push(`end-${input}`)
|
||||
return ok(input.toUpperCase())
|
||||
})
|
||||
|
||||
const previousPipeline = createNewBatchPipeline<string>()
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [
|
||||
{ result: ok('fast'), context: context1 },
|
||||
{ result: ok('slow'), context: context2 },
|
||||
{ result: ok('medium'), context: context3 },
|
||||
]
|
||||
previousPipeline.feed(testBatch)
|
||||
|
||||
const pipeline = new ConcurrentBatchProcessingPipeline(processor, previousPipeline)
|
||||
|
||||
// Process the batch
|
||||
const result1 = await pipeline.next()
|
||||
const result2 = await pipeline.next()
|
||||
const result3 = await pipeline.next()
|
||||
|
||||
// Verify results
|
||||
expect(result1).toEqual([{ result: ok('FAST'), context: context1 }])
|
||||
expect(result2).toEqual([{ result: ok('SLOW'), context: context2 }])
|
||||
expect(result3).toEqual([{ result: ok('MEDIUM'), context: context3 }])
|
||||
|
||||
// Verify concurrent processing (all starts before any end)
|
||||
expect(processingOrder).toEqual([
|
||||
'start-fast',
|
||||
'start-slow',
|
||||
'start-medium',
|
||||
'end-fast',
|
||||
'end-medium',
|
||||
'end-slow',
|
||||
])
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,55 @@
|
||||
import { instrumentFn } from '../../common/tracing/tracing-utils'
|
||||
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { GatheringBatchPipeline } from './gathering-batch-pipeline'
|
||||
import { Pipeline, PipelineResultWithContext } from './pipeline.interface'
|
||||
import { isOkResult } from './results'
|
||||
|
||||
export class ConcurrentBatchProcessingPipeline<TInput, TIntermediate, TOutput>
|
||||
implements BatchPipeline<TInput, TOutput>
|
||||
{
|
||||
private promiseQueue: Promise<PipelineResultWithContext<TOutput>>[] = []
|
||||
|
||||
constructor(
|
||||
private processor: Pipeline<TIntermediate, TOutput>,
|
||||
private previousPipeline: BatchPipeline<TInput, TIntermediate>
|
||||
) {}
|
||||
|
||||
feed(elements: BatchPipelineResultWithContext<TInput>): void {
|
||||
this.previousPipeline.feed(elements)
|
||||
}
|
||||
|
||||
async next(): Promise<BatchPipelineResultWithContext<TOutput> | null> {
|
||||
const previousResults = await this.previousPipeline.next()
|
||||
|
||||
if (previousResults !== null) {
|
||||
const processorName = this.processor.constructor.name || 'anonymousProcessor'
|
||||
|
||||
previousResults.forEach((resultWithContext) => {
|
||||
const result = resultWithContext.result
|
||||
if (isOkResult(result)) {
|
||||
const promise = instrumentFn(processorName, () => this.processor.process(resultWithContext))
|
||||
this.promiseQueue.push(promise)
|
||||
} else {
|
||||
this.promiseQueue.push(
|
||||
Promise.resolve({
|
||||
result: result,
|
||||
context: resultWithContext.context,
|
||||
})
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const promise = this.promiseQueue.shift()
|
||||
if (promise === undefined) {
|
||||
return null
|
||||
}
|
||||
|
||||
const resultWithContext = await promise
|
||||
return [resultWithContext]
|
||||
}
|
||||
|
||||
gather(): GatheringBatchPipeline<TInput, TOutput> {
|
||||
return new GatheringBatchPipeline(this)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { GatheringBatchPipeline } from './gathering-batch-pipeline'
|
||||
import { createNewBatchPipeline } from './helpers'
|
||||
import { dlq, drop, ok, redirect } from './results'
|
||||
|
||||
// Mock batch processing pipeline for testing
|
||||
class MockBatchProcessingPipeline<T> implements BatchPipeline<T, T> {
|
||||
private results: BatchPipelineResultWithContext<T>[] = []
|
||||
private currentIndex = 0
|
||||
|
||||
constructor(results: BatchPipelineResultWithContext<T>[]) {
|
||||
this.results = results
|
||||
}
|
||||
|
||||
feed(elements: BatchPipelineResultWithContext<T>): void {
|
||||
this.results.push(elements)
|
||||
}
|
||||
|
||||
async next(): Promise<BatchPipelineResultWithContext<T> | null> {
|
||||
if (this.currentIndex >= this.results.length) {
|
||||
return Promise.resolve(null)
|
||||
}
|
||||
return Promise.resolve(this.results[this.currentIndex++])
|
||||
}
|
||||
}
|
||||
|
||||
describe('GatheringBatchPipeline', () => {
|
||||
let message1: Message
|
||||
let message2: Message
|
||||
let message3: Message
|
||||
let context1: { message: Message }
|
||||
let context2: { message: Message }
|
||||
let context3: { message: Message }
|
||||
|
||||
beforeEach(() => {
|
||||
// Create different mock messages with unique properties
|
||||
message1 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 1,
|
||||
key: Buffer.from('key1'),
|
||||
value: Buffer.from('value1'),
|
||||
timestamp: Date.now(),
|
||||
} as Message
|
||||
|
||||
message2 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 2,
|
||||
key: Buffer.from('key2'),
|
||||
value: Buffer.from('value2'),
|
||||
timestamp: Date.now() + 1,
|
||||
} as Message
|
||||
|
||||
message3 = {
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 3,
|
||||
key: Buffer.from('key3'),
|
||||
value: Buffer.from('value3'),
|
||||
timestamp: Date.now() + 2,
|
||||
} as Message
|
||||
|
||||
context1 = { message: message1 }
|
||||
context2 = { message: message2 }
|
||||
context3 = { message: message3 }
|
||||
})
|
||||
|
||||
describe('constructor', () => {
|
||||
it('should create instance with sub-pipeline', () => {
|
||||
const subPipeline = createNewBatchPipeline<string>()
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
expect(gatherPipeline).toBeInstanceOf(GatheringBatchPipeline)
|
||||
})
|
||||
})
|
||||
|
||||
describe('feed', () => {
|
||||
it('should delegate to sub-pipeline', () => {
|
||||
const subPipeline = createNewBatchPipeline<string>()
|
||||
const spy = jest.spyOn(subPipeline, 'feed')
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const testBatch: BatchPipelineResultWithContext<string> = [{ result: ok('test'), context: context1 }]
|
||||
|
||||
gatherPipeline.feed(testBatch)
|
||||
|
||||
expect(spy).toHaveBeenCalledWith(testBatch)
|
||||
})
|
||||
})
|
||||
|
||||
describe('next', () => {
|
||||
it('should return null when no results available', async () => {
|
||||
const subPipeline = createNewBatchPipeline<string>()
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
it('should gather all results from sub-pipeline in single call', async () => {
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[{ result: ok('hello'), context: context1 }],
|
||||
[{ result: ok('world'), context: context2 }],
|
||||
[{ result: ok('test'), context: context3 }],
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
const result2 = await gatherPipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
{ result: ok('test'), context: context3 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should preserve non-success results', async () => {
|
||||
const dropResult = drop<string>('test drop')
|
||||
const dlqResult = dlq<string>('test dlq', new Error('test error'))
|
||||
const redirectResult = redirect<string>('test redirect', 'test-topic')
|
||||
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[{ result: dropResult, context: context1 }],
|
||||
[{ result: dlqResult, context: context2 }],
|
||||
[{ result: redirectResult, context: context3 }],
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
const result2 = await gatherPipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: dropResult, context: context1 },
|
||||
{ result: dlqResult, context: context2 },
|
||||
{ result: redirectResult, context: context3 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should handle mixed success and non-success results', async () => {
|
||||
const dropResult = drop<string>('test drop')
|
||||
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[{ result: ok('hello'), context: context1 }],
|
||||
[{ result: dropResult, context: context2 }],
|
||||
[{ result: ok('world'), context: context3 }],
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
const result2 = await gatherPipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: dropResult, context: context2 },
|
||||
{ result: ok('world'), context: context3 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should handle empty batches from sub-pipeline', async () => {
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[], // Empty batch
|
||||
[{ result: ok('hello'), context: context1 }],
|
||||
[], // Another empty batch
|
||||
[{ result: ok('world'), context: context2 }],
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
const result2 = await gatherPipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: ok('hello'), context: context1 },
|
||||
{ result: ok('world'), context: context2 },
|
||||
])
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should return null when all batches are empty', async () => {
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[], // Empty batch
|
||||
[], // Another empty batch
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
it('should preserve order of results from sub-pipeline', async () => {
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[{ result: ok('first'), context: context1 }],
|
||||
[{ result: ok('second'), context: context2 }],
|
||||
[{ result: ok('third'), context: context3 }],
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
|
||||
expect(result).toEqual([
|
||||
{ result: ok('first'), context: context1 },
|
||||
{ result: ok('second'), context: context2 },
|
||||
{ result: ok('third'), context: context3 },
|
||||
])
|
||||
})
|
||||
|
||||
it('should handle large number of batches', async () => {
|
||||
const batches: BatchPipelineResultWithContext<string>[] = []
|
||||
for (let i = 0; i < 10; i++) {
|
||||
batches.push([{ result: ok(`item${i}`), context: context1 }])
|
||||
}
|
||||
|
||||
const subPipeline = new MockBatchProcessingPipeline(batches)
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
const result = await gatherPipeline.next()
|
||||
const result2 = await gatherPipeline.next()
|
||||
|
||||
expect(result).toHaveLength(10)
|
||||
expect(result![0]).toEqual({ result: ok('item0'), context: context1 })
|
||||
expect(result![9]).toEqual({ result: ok('item9'), context: context1 })
|
||||
expect(result2).toBeNull()
|
||||
})
|
||||
|
||||
it('should resume after returning null when more batches are fed', async () => {
|
||||
const subPipeline = new MockBatchProcessingPipeline([
|
||||
[{ result: ok('first'), context: context1 }],
|
||||
[{ result: ok('second'), context: context2 }],
|
||||
])
|
||||
|
||||
const gatherPipeline = new GatheringBatchPipeline(subPipeline)
|
||||
|
||||
// First round: process initial batches
|
||||
const result1 = await gatherPipeline.next()
|
||||
expect(result1).toEqual([
|
||||
{ result: ok('first'), context: context1 },
|
||||
{ result: ok('second'), context: context2 },
|
||||
])
|
||||
|
||||
// Should return null when exhausted
|
||||
const result2 = await gatherPipeline.next()
|
||||
expect(result2).toBeNull()
|
||||
|
||||
// Feed more batches
|
||||
subPipeline.feed([{ result: ok('third'), context: context3 }])
|
||||
|
||||
// Should resume processing
|
||||
const result3 = await gatherPipeline.next()
|
||||
expect(result3).toEqual([{ result: ok('third'), context: context3 }])
|
||||
|
||||
// Should return null again
|
||||
const result4 = await gatherPipeline.next()
|
||||
expect(result4).toBeNull()
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,45 @@
|
||||
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { isOkResult, ok } from './results'
|
||||
import { BatchProcessingStep, SequentialBatchPipeline } from './sequential-batch-pipeline'
|
||||
|
||||
export class GatheringBatchPipeline<TInput, TOutput> implements BatchPipeline<TInput, TOutput> {
|
||||
constructor(private subPipeline: BatchPipeline<TInput, TOutput>) {}
|
||||
|
||||
feed(elements: BatchPipelineResultWithContext<TInput>): void {
|
||||
this.subPipeline.feed(elements)
|
||||
}
|
||||
|
||||
async next(): Promise<BatchPipelineResultWithContext<TOutput> | null> {
|
||||
const allResults: BatchPipelineResultWithContext<TOutput> = []
|
||||
|
||||
// Loop and collect all results from sub-pipeline
|
||||
let result = await this.subPipeline.next()
|
||||
|
||||
while (result !== null) {
|
||||
// Collect all results in order, preserving context
|
||||
result.forEach((resultWithContext) => {
|
||||
if (isOkResult(resultWithContext.result)) {
|
||||
allResults.push({
|
||||
result: ok(resultWithContext.result.value),
|
||||
context: resultWithContext.context,
|
||||
})
|
||||
} else {
|
||||
allResults.push(resultWithContext)
|
||||
}
|
||||
})
|
||||
|
||||
result = await this.subPipeline.next()
|
||||
}
|
||||
|
||||
// Return all collected results, or null if no results
|
||||
if (allResults.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
return allResults
|
||||
}
|
||||
|
||||
pipeBatch<U>(step: BatchProcessingStep<TOutput, U>): SequentialBatchPipeline<TInput, TOutput, U> {
|
||||
return new SequentialBatchPipeline(step, this)
|
||||
}
|
||||
}
|
||||
30
plugin-server/src/ingestion/pipelines/helpers.ts
Normal file
30
plugin-server/src/ingestion/pipelines/helpers.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { BufferingBatchPipeline } from './buffering-batch-pipeline'
|
||||
import { ok } from './results'
|
||||
import { StartPipeline } from './start-pipeline'
|
||||
|
||||
/**
|
||||
* Helper function to create a new processing pipeline for single items
|
||||
*/
|
||||
export function createNewPipeline<T = { message: Message }>(): StartPipeline<T> {
|
||||
return new StartPipeline<T>()
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to create a new batch processing pipeline starting with a root pipeline
|
||||
*/
|
||||
export function createNewBatchPipeline<T = { message: Message }>(): BufferingBatchPipeline<T> {
|
||||
return new BufferingBatchPipeline<T>()
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to create a batch of ResultWithContext from Kafka messages
|
||||
*/
|
||||
export function createBatch(messages: Message[]): BatchPipelineResultWithContext<{ message: Message }> {
|
||||
return messages.map((message) => ({
|
||||
result: ok({ message }),
|
||||
context: { message },
|
||||
}))
|
||||
}
|
||||
25
plugin-server/src/ingestion/pipelines/pipeline.interface.ts
Normal file
25
plugin-server/src/ingestion/pipelines/pipeline.interface.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { PipelineResult } from './results'
|
||||
|
||||
/**
|
||||
* Processing context that carries message through pipeline transformations
|
||||
*/
|
||||
export interface PipelineContext {
|
||||
message: Message
|
||||
}
|
||||
|
||||
/**
|
||||
* Result with context wrapper that carries both the pipeline result and processing context
|
||||
*/
|
||||
export interface PipelineResultWithContext<T> {
|
||||
result: PipelineResult<T>
|
||||
context: PipelineContext
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for single-item processors
|
||||
*/
|
||||
export interface Pipeline<TInput, TOutput> {
|
||||
process(input: PipelineResultWithContext<TInput>): Promise<PipelineResultWithContext<TOutput>>
|
||||
}
|
||||
1678
plugin-server/src/ingestion/pipelines/pipelines.integration.test.ts
Normal file
1678
plugin-server/src/ingestion/pipelines/pipelines.integration.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,369 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { KafkaProducerWrapper } from '../../kafka/producer'
|
||||
import { PromiseScheduler } from '../../utils/promise-scheduler'
|
||||
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from '../../worker/ingestion/pipeline-helpers'
|
||||
import { BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { createNewBatchPipeline } from './helpers'
|
||||
import { PipelineConfig, ResultHandlingPipeline } from './result-handling-pipeline'
|
||||
import { dlq, drop, ok, redirect } from './results'
|
||||
|
||||
// Mock the pipeline helpers
|
||||
jest.mock('../../worker/ingestion/pipeline-helpers', () => ({
|
||||
logDroppedMessage: jest.fn(),
|
||||
redirectMessageToTopic: jest.fn(),
|
||||
sendMessageToDLQ: jest.fn(),
|
||||
}))
|
||||
|
||||
const mockLogDroppedMessage = logDroppedMessage as jest.MockedFunction<typeof logDroppedMessage>
|
||||
const mockRedirectMessageToTopic = redirectMessageToTopic as jest.MockedFunction<typeof redirectMessageToTopic>
|
||||
const mockSendMessageToDLQ = sendMessageToDLQ as jest.MockedFunction<typeof sendMessageToDLQ>
|
||||
|
||||
describe('ResultHandlingPipeline', () => {
|
||||
let mockKafkaProducer: KafkaProducerWrapper
|
||||
let mockPromiseScheduler: PromiseScheduler
|
||||
let config: PipelineConfig
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks()
|
||||
|
||||
mockKafkaProducer = {
|
||||
producer: {} as any,
|
||||
queueMessages: jest.fn(),
|
||||
} as unknown as KafkaProducerWrapper
|
||||
|
||||
mockPromiseScheduler = {
|
||||
schedule: jest.fn(),
|
||||
} as unknown as PromiseScheduler
|
||||
|
||||
config = {
|
||||
kafkaProducer: mockKafkaProducer,
|
||||
dlqTopic: 'test-dlq',
|
||||
promiseScheduler: mockPromiseScheduler,
|
||||
}
|
||||
})
|
||||
|
||||
describe('basic functionality', () => {
|
||||
it('should process successful results and return values', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('test2'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
|
||||
{ result: ok({ processed: 'test2' }), context: { message: messages[1] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test2' }])
|
||||
})
|
||||
|
||||
it('should handle empty batch', async () => {
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed([])
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('result handling', () => {
|
||||
it('should filter out dropped results and log them', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('drop'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('test3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
|
||||
{ result: drop('test drop reason'), context: { message: messages[1] } },
|
||||
{ result: ok({ processed: 'test3' }), context: { message: messages[2] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test3' }])
|
||||
expect(mockLogDroppedMessage).toHaveBeenCalledWith(messages[1], 'test drop reason', 'result_handler')
|
||||
})
|
||||
|
||||
it('should filter out redirected results and redirect them', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('redirect'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('test3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
|
||||
{ result: redirect('test redirect', 'overflow-topic', true, false), context: { message: messages[1] } },
|
||||
{ result: ok({ processed: 'test3' }), context: { message: messages[2] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test3' }])
|
||||
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockPromiseScheduler,
|
||||
messages[1],
|
||||
'overflow-topic',
|
||||
'result_handler',
|
||||
true,
|
||||
false
|
||||
)
|
||||
})
|
||||
|
||||
it('should filter out dlq results and send to DLQ', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('test3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
const testError = new Error('test error')
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
|
||||
{ result: dlq('test dlq reason', testError), context: { message: messages[1] } },
|
||||
{ result: ok({ processed: 'test3' }), context: { message: messages[2] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([{ processed: 'test1' }, { processed: 'test3' }])
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
messages[1],
|
||||
testError,
|
||||
'result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle dlq result without error and create default error', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: dlq('test dlq reason'), context: { message: messages[0] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([])
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
messages[0],
|
||||
expect.any(Error),
|
||||
'result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
|
||||
const errorArg = (mockSendMessageToDLQ as jest.Mock).mock.calls[0][2]
|
||||
expect(errorArg.message).toBe('test dlq reason')
|
||||
})
|
||||
|
||||
it('should handle mixed results correctly', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('success1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('drop'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('success2'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
{ value: Buffer.from('redirect'), topic: 'test', partition: 0, offset: 4 } as Message,
|
||||
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 5 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: ok({ processed: 'success1' }), context: { message: messages[0] } },
|
||||
{ result: drop('dropped item'), context: { message: messages[1] } },
|
||||
{ result: ok({ processed: 'success2' }), context: { message: messages[2] } },
|
||||
{ result: redirect('redirected item', 'overflow-topic'), context: { message: messages[3] } },
|
||||
{ result: dlq('dlq item', new Error('processing error')), context: { message: messages[4] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([{ processed: 'success1' }, { processed: 'success2' }])
|
||||
|
||||
// Verify all non-success results were handled
|
||||
expect(mockLogDroppedMessage).toHaveBeenCalledWith(messages[1], 'dropped item', 'result_handler')
|
||||
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockPromiseScheduler,
|
||||
messages[3],
|
||||
'overflow-topic',
|
||||
'result_handler',
|
||||
true,
|
||||
true
|
||||
)
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
messages[4],
|
||||
expect.any(Error),
|
||||
'result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('concurrent processing', () => {
|
||||
it('should handle concurrent processing results', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('2'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: ok({ count: 2 }), context: { message: messages[0] } },
|
||||
{ result: ok({ count: 4 }), context: { message: messages[1] } },
|
||||
{ result: ok({ count: 6 }), context: { message: messages[2] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([{ count: 2 }, { count: 4 }, { count: 6 }])
|
||||
})
|
||||
})
|
||||
|
||||
describe('redirect result with default parameters', () => {
|
||||
it('should use default preserveKey and awaitAck when not specified', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('redirect'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: redirect('test redirect', 'overflow-topic'), context: { message: messages[0] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([])
|
||||
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockPromiseScheduler,
|
||||
messages[0],
|
||||
'overflow-topic',
|
||||
'result_handler',
|
||||
true, // default preserveKey
|
||||
true // default awaitAck
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('Integration tests', () => {
|
||||
let mockKafkaProducer: KafkaProducerWrapper
|
||||
let mockPromiseScheduler: PromiseScheduler
|
||||
let config: PipelineConfig
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks()
|
||||
|
||||
mockKafkaProducer = {
|
||||
producer: {} as any,
|
||||
queueMessages: jest.fn(),
|
||||
} as unknown as KafkaProducerWrapper
|
||||
|
||||
mockPromiseScheduler = {
|
||||
schedule: jest.fn(),
|
||||
} as unknown as PromiseScheduler
|
||||
|
||||
config = {
|
||||
kafkaProducer: mockKafkaProducer,
|
||||
dlqTopic: 'test-dlq',
|
||||
promiseScheduler: mockPromiseScheduler,
|
||||
}
|
||||
})
|
||||
|
||||
it('should handle realistic event processing pipeline', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('test-event'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{
|
||||
result: ok({
|
||||
eventType: 'pageview',
|
||||
userId: 'user123',
|
||||
isValid: true,
|
||||
timestamp: '2023-01-01T00:00:00Z',
|
||||
}),
|
||||
context: { message: messages[0] },
|
||||
},
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([
|
||||
{
|
||||
eventType: 'pageview',
|
||||
userId: 'user123',
|
||||
isValid: true,
|
||||
timestamp: '2023-01-01T00:00:00Z',
|
||||
},
|
||||
])
|
||||
})
|
||||
|
||||
it('should handle pipeline failure at different stages', async () => {
|
||||
const messages: Message[] = [{ value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message]
|
||||
|
||||
// Create batch results directly
|
||||
const batchResults: BatchPipelineResultWithContext<any> = [
|
||||
{ result: dlq('Validation failed', new Error('Invalid data')), context: { message: messages[0] } },
|
||||
]
|
||||
|
||||
const pipeline = createNewBatchPipeline()
|
||||
const resultPipeline = ResultHandlingPipeline.of(pipeline, config)
|
||||
resultPipeline.feed(batchResults)
|
||||
const results = await resultPipeline.next()
|
||||
|
||||
expect(results).toEqual([])
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
messages[0],
|
||||
expect.any(Error),
|
||||
'result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,88 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { KafkaProducerWrapper } from '../../kafka/producer'
|
||||
import { PromiseScheduler } from '../../utils/promise-scheduler'
|
||||
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from '../../worker/ingestion/pipeline-helpers'
|
||||
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { PipelineResult, isDlqResult, isDropResult, isOkResult, isRedirectResult } from './results'
|
||||
|
||||
export type PipelineConfig = {
|
||||
kafkaProducer: KafkaProducerWrapper
|
||||
dlqTopic: string
|
||||
promiseScheduler: PromiseScheduler
|
||||
}
|
||||
|
||||
/**
|
||||
* Unified result handling pipeline that wraps any BatchProcessingPipeline and handles
|
||||
* non-success results (DLQ, DROP, REDIRECT) while filtering to only successful values.
|
||||
*/
|
||||
export class ResultHandlingPipeline<TInput, TOutput> {
|
||||
constructor(
|
||||
private pipeline: BatchPipeline<TInput, TOutput>,
|
||||
private config: PipelineConfig
|
||||
) {}
|
||||
|
||||
feed(elements: BatchPipelineResultWithContext<TInput>): void {
|
||||
this.pipeline.feed(elements)
|
||||
}
|
||||
|
||||
async next(): Promise<TOutput[] | null> {
|
||||
const results = await this.pipeline.next()
|
||||
|
||||
if (results === null) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Process results and handle non-success cases
|
||||
const processedResults: TOutput[] = []
|
||||
|
||||
for (const resultWithContext of results) {
|
||||
if (isOkResult(resultWithContext.result)) {
|
||||
const value = resultWithContext.result.value as TOutput
|
||||
processedResults.push(value)
|
||||
} else {
|
||||
// For non-success results, get the message from context
|
||||
const originalMessage = resultWithContext.context.message
|
||||
await this.handleNonSuccessResult(resultWithContext.result, originalMessage, 'result_handler')
|
||||
}
|
||||
}
|
||||
|
||||
// Return only successful results
|
||||
return processedResults
|
||||
}
|
||||
|
||||
private async handleNonSuccessResult(
|
||||
result: PipelineResult<TOutput>,
|
||||
originalMessage: Message,
|
||||
stepName: string
|
||||
): Promise<void> {
|
||||
if (isDlqResult(result)) {
|
||||
await sendMessageToDLQ(
|
||||
this.config.kafkaProducer,
|
||||
originalMessage,
|
||||
result.error || new Error(result.reason),
|
||||
stepName,
|
||||
this.config.dlqTopic
|
||||
)
|
||||
} else if (isDropResult(result)) {
|
||||
logDroppedMessage(originalMessage, result.reason, stepName)
|
||||
} else if (isRedirectResult(result)) {
|
||||
await redirectMessageToTopic(
|
||||
this.config.kafkaProducer,
|
||||
this.config.promiseScheduler,
|
||||
originalMessage,
|
||||
result.topic,
|
||||
stepName,
|
||||
result.preserveKey ?? true,
|
||||
result.awaitAck ?? true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
static of<TInput, TOutput>(
|
||||
pipeline: BatchPipeline<TInput, TOutput>,
|
||||
config: PipelineConfig
|
||||
): ResultHandlingPipeline<TInput, TOutput> {
|
||||
return new ResultHandlingPipeline(pipeline, config)
|
||||
}
|
||||
}
|
||||
70
plugin-server/src/ingestion/pipelines/results.ts
Normal file
70
plugin-server/src/ingestion/pipelines/results.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
export enum PipelineResultType {
|
||||
OK,
|
||||
DLQ,
|
||||
DROP,
|
||||
REDIRECT,
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic result type for pipeline steps that can succeed, be dropped, or sent to DLQ
|
||||
*/
|
||||
export type PipelineResultOk<T> = { type: PipelineResultType.OK; value: T }
|
||||
export type PipelineResultDlq = { type: PipelineResultType.DLQ; reason: string; error: unknown }
|
||||
export type PipelineResultDrop = { type: PipelineResultType.DROP; reason: string }
|
||||
export type PipelineResultRedirect = {
|
||||
type: PipelineResultType.REDIRECT
|
||||
reason: string
|
||||
topic: string
|
||||
preserveKey?: boolean
|
||||
awaitAck?: boolean
|
||||
}
|
||||
export type PipelineResult<T> = PipelineResultOk<T> | PipelineResultDlq | PipelineResultDrop | PipelineResultRedirect
|
||||
|
||||
/**
|
||||
* Helper functions for creating pipeline step results
|
||||
*/
|
||||
export function ok<T>(value: T): PipelineResult<T> {
|
||||
return { type: PipelineResultType.OK, value }
|
||||
}
|
||||
|
||||
export function dlq<T>(reason: string, error?: any): PipelineResult<T> {
|
||||
return { type: PipelineResultType.DLQ, reason, error }
|
||||
}
|
||||
|
||||
export function drop<T>(reason: string): PipelineResult<T> {
|
||||
return { type: PipelineResultType.DROP, reason }
|
||||
}
|
||||
|
||||
export function redirect<T>(
|
||||
reason: string,
|
||||
topic: string,
|
||||
preserveKey: boolean = true,
|
||||
awaitAck: boolean = true
|
||||
): PipelineResult<T> {
|
||||
return {
|
||||
type: PipelineResultType.REDIRECT,
|
||||
reason,
|
||||
topic,
|
||||
preserveKey,
|
||||
awaitAck,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard functions
|
||||
*/
|
||||
export function isOkResult<T>(result: PipelineResult<T>): result is PipelineResultOk<T> {
|
||||
return result.type === PipelineResultType.OK
|
||||
}
|
||||
|
||||
export function isDlqResult<T>(result: PipelineResult<T>): result is PipelineResultDlq {
|
||||
return result.type === PipelineResultType.DLQ
|
||||
}
|
||||
|
||||
export function isDropResult<T>(result: PipelineResult<T>): result is PipelineResultDrop {
|
||||
return result.type === PipelineResultType.DROP
|
||||
}
|
||||
|
||||
export function isRedirectResult<T>(result: PipelineResult<T>): result is PipelineResultRedirect {
|
||||
return result.type === PipelineResultType.REDIRECT
|
||||
}
|
||||
@@ -0,0 +1,214 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { createBatch, createNewBatchPipeline } from './helpers'
|
||||
import { dlq, drop, ok } from './results'
|
||||
import { SequentialBatchPipeline } from './sequential-batch-pipeline'
|
||||
|
||||
describe('SequentialBatchPipeline', () => {
|
||||
describe('basic functionality', () => {
|
||||
it('should process batch through pipeline', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('test1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('test2'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const rootPipeline = createNewBatchPipeline()
|
||||
const pipeline = new SequentialBatchPipeline((items: any[]) => {
|
||||
return Promise.resolve(items.map((item: any) => ok({ processed: item.message.value?.toString() })))
|
||||
}, rootPipeline)
|
||||
|
||||
pipeline.feed(batch)
|
||||
const results = await pipeline.next()
|
||||
|
||||
expect(results).toEqual([
|
||||
{ result: ok({ processed: 'test1' }), context: { message: messages[0] } },
|
||||
{ result: ok({ processed: 'test2' }), context: { message: messages[1] } },
|
||||
])
|
||||
})
|
||||
|
||||
it('should handle empty batch', async () => {
|
||||
const rootPipeline = createNewBatchPipeline()
|
||||
const pipeline = new SequentialBatchPipeline((items: any[]) => {
|
||||
return Promise.resolve(items.map((item: any) => ok(item)))
|
||||
}, rootPipeline)
|
||||
|
||||
pipeline.feed([])
|
||||
const results = await pipeline.next()
|
||||
|
||||
expect(results).toEqual(null)
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipe() - batch operations', () => {
|
||||
it('should execute batch step on all successful values', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('2'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const rootPipeline = createNewBatchPipeline()
|
||||
const pipeline = new SequentialBatchPipeline((items: any[]) => {
|
||||
return Promise.resolve(
|
||||
items.map((item: any) => ok({ count: parseInt(item.message.value?.toString() || '0') * 2 }))
|
||||
)
|
||||
}, rootPipeline)
|
||||
|
||||
pipeline.feed(batch)
|
||||
const results = await pipeline.next()
|
||||
|
||||
expect(results).toEqual([
|
||||
{ result: ok({ count: 2 }), context: { message: messages[0] } },
|
||||
{ result: ok({ count: 4 }), context: { message: messages[1] } },
|
||||
{ result: ok({ count: 6 }), context: { message: messages[2] } },
|
||||
])
|
||||
})
|
||||
|
||||
it('should preserve non-success results and only process successful ones', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('drop'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
{ value: Buffer.from('dlq'), topic: 'test', partition: 0, offset: 4 } as Message,
|
||||
]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const rootPipeline = createNewBatchPipeline()
|
||||
const firstPipeline = new SequentialBatchPipeline((items: any[]) => {
|
||||
return Promise.resolve(
|
||||
items.map((item: any) => {
|
||||
const value = item.message.value?.toString() || ''
|
||||
if (value === 'drop') {
|
||||
return drop('dropped item')
|
||||
}
|
||||
if (value === 'dlq') {
|
||||
return dlq('dlq item', new Error('test error'))
|
||||
}
|
||||
return ok({ count: parseInt(value) })
|
||||
})
|
||||
)
|
||||
}, rootPipeline)
|
||||
|
||||
const secondPipeline = new SequentialBatchPipeline((items: any[]) => {
|
||||
// Should only receive successful items
|
||||
expect(items).toEqual([{ count: 1 }, { count: 3 }])
|
||||
return Promise.resolve(items.map((item: any) => ok({ count: item.count * 2 })))
|
||||
}, firstPipeline)
|
||||
|
||||
secondPipeline.feed(batch)
|
||||
const results = await secondPipeline.next()
|
||||
|
||||
expect(results).toEqual([
|
||||
{ result: ok({ count: 2 }), context: { message: messages[0] } },
|
||||
{ result: drop('dropped item'), context: { message: messages[1] } },
|
||||
{ result: ok({ count: 6 }), context: { message: messages[2] } },
|
||||
{ result: dlq('dlq item', new Error('test error')), context: { message: messages[3] } },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeConcurrently() - concurrent individual processing', () => {
|
||||
it('should process each item concurrently', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('2'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('3'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const processor = {
|
||||
async process(input: any) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
const count = parseInt(input.result.value.message.value?.toString() || '0')
|
||||
return { result: ok({ count: count * 2 }), context: input.context }
|
||||
},
|
||||
}
|
||||
|
||||
const pipeline = createNewBatchPipeline().pipeConcurrently(processor)
|
||||
|
||||
pipeline.feed(batch)
|
||||
|
||||
// Collect all results by calling next() until it returns null
|
||||
const allResults = []
|
||||
let result = await pipeline.next()
|
||||
while (result !== null) {
|
||||
allResults.push(...result) // Flatten the array
|
||||
result = await pipeline.next()
|
||||
}
|
||||
|
||||
expect(allResults).toEqual([
|
||||
{ result: ok({ count: 2 }), context: { message: messages[0] } },
|
||||
{ result: ok({ count: 4 }), context: { message: messages[1] } },
|
||||
{ result: ok({ count: 6 }), context: { message: messages[2] } },
|
||||
])
|
||||
})
|
||||
|
||||
it('should preserve order despite concurrent execution', async () => {
|
||||
const messages: Message[] = [
|
||||
{ value: Buffer.from('30'), topic: 'test', partition: 0, offset: 1 } as Message,
|
||||
{ value: Buffer.from('10'), topic: 'test', partition: 0, offset: 2 } as Message,
|
||||
{ value: Buffer.from('20'), topic: 'test', partition: 0, offset: 3 } as Message,
|
||||
]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const processor = {
|
||||
async process(input: any) {
|
||||
const delay = parseInt(input.result.value.message.value?.toString() || '0')
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
return { result: ok({ processed: delay }), context: input.context }
|
||||
},
|
||||
}
|
||||
|
||||
const pipeline = createNewBatchPipeline().pipeConcurrently(processor)
|
||||
|
||||
pipeline.feed(batch)
|
||||
|
||||
// Collect all results by calling next() until it returns null
|
||||
const allResults = []
|
||||
let result = await pipeline.next()
|
||||
while (result !== null) {
|
||||
allResults.push(...result) // Flatten the array
|
||||
result = await pipeline.next()
|
||||
}
|
||||
|
||||
expect(allResults).toEqual([
|
||||
{ result: ok({ processed: 30 }), context: { message: messages[0] } },
|
||||
{ result: ok({ processed: 10 }), context: { message: messages[1] } },
|
||||
{ result: ok({ processed: 20 }), context: { message: messages[2] } },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('error handling', () => {
|
||||
it('should propagate errors from batch operations', async () => {
|
||||
const messages: Message[] = [{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const rootPipeline = createNewBatchPipeline()
|
||||
const pipeline = new SequentialBatchPipeline(() => {
|
||||
return Promise.reject(new Error('Batch step failed'))
|
||||
}, rootPipeline)
|
||||
|
||||
pipeline.feed(batch)
|
||||
await expect(pipeline.next()).rejects.toThrow('Batch step failed')
|
||||
})
|
||||
|
||||
it('should propagate errors from concurrent operations', async () => {
|
||||
const messages: Message[] = [{ value: Buffer.from('1'), topic: 'test', partition: 0, offset: 1 } as Message]
|
||||
|
||||
const batch = createBatch(messages)
|
||||
const processor = {
|
||||
process() {
|
||||
return Promise.reject(new Error('Concurrent step failed'))
|
||||
},
|
||||
}
|
||||
|
||||
const pipeline = createNewBatchPipeline().pipeConcurrently(processor)
|
||||
|
||||
pipeline.feed(batch)
|
||||
await expect(pipeline.next()).rejects.toThrow('Concurrent step failed')
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,61 @@
|
||||
import { instrumentFn } from '../../common/tracing/tracing-utils'
|
||||
import { BatchPipeline, BatchPipelineResultWithContext } from './batch-pipeline.interface'
|
||||
import { PipelineResultWithContext } from './pipeline.interface'
|
||||
import { PipelineResult, PipelineResultOk, isOkResult } from './results'
|
||||
|
||||
/**
|
||||
* Type guard for ResultWithContext that asserts the result is successful
|
||||
*/
|
||||
function isSuccessResultWithContext<T>(
|
||||
resultWithContext: PipelineResultWithContext<T>
|
||||
): resultWithContext is PipelineResultWithContext<T> & { result: PipelineResultOk<T> } {
|
||||
return isOkResult(resultWithContext.result)
|
||||
}
|
||||
|
||||
export type BatchProcessingStep<T, U> = (values: T[]) => Promise<PipelineResult<U>[]>
|
||||
|
||||
export class SequentialBatchPipeline<TInput, TIntermediate, TOutput> implements BatchPipeline<TInput, TOutput> {
|
||||
constructor(
|
||||
private currentStep: BatchProcessingStep<TIntermediate, TOutput>,
|
||||
private previousPipeline: BatchPipeline<TInput, TIntermediate>
|
||||
) {}
|
||||
|
||||
feed(elements: BatchPipelineResultWithContext<TInput>): void {
|
||||
this.previousPipeline.feed(elements)
|
||||
}
|
||||
|
||||
async next(): Promise<BatchPipelineResultWithContext<TOutput> | null> {
|
||||
const previousResults = await this.previousPipeline.next()
|
||||
if (previousResults === null) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Filter successful values for processing
|
||||
const successfulValues = previousResults
|
||||
.filter(isSuccessResultWithContext)
|
||||
.map((resultWithContext) => resultWithContext.result.value)
|
||||
|
||||
// Apply current step to successful values
|
||||
const stepName = this.currentStep.name || 'anonymousBatchStep'
|
||||
let stepResults: PipelineResult<TOutput>[] = []
|
||||
if (successfulValues.length > 0) {
|
||||
stepResults = await instrumentFn(stepName, () => this.currentStep(successfulValues))
|
||||
}
|
||||
let stepIndex = 0
|
||||
|
||||
// Map results back, preserving context and non-successful results
|
||||
return previousResults.map((resultWithContext) => {
|
||||
if (isOkResult(resultWithContext.result)) {
|
||||
return {
|
||||
result: stepResults[stepIndex++],
|
||||
context: resultWithContext.context,
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
result: resultWithContext.result,
|
||||
context: resultWithContext.context,
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
87
plugin-server/src/ingestion/pipelines/start-pipeline.test.ts
Normal file
87
plugin-server/src/ingestion/pipelines/start-pipeline.test.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { dlq, drop, ok, redirect } from './results'
|
||||
import { StartPipeline } from './start-pipeline'
|
||||
import { StepPipeline } from './step-pipeline'
|
||||
|
||||
describe('StartPipeline', () => {
|
||||
describe('basic functionality', () => {
|
||||
it('should process single item through pipeline with success result', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const pipeline = new StartPipeline<{ data: string }>().pipe((input) => {
|
||||
return ok({ processed: input.data })
|
||||
})
|
||||
|
||||
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
|
||||
})
|
||||
|
||||
it('should process single item through pipeline with drop result', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const pipeline = new StartPipeline<{ data: string }>().pipe((_input) => {
|
||||
return drop('dropped item')
|
||||
})
|
||||
|
||||
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
expect(result).toEqual({ result: drop('dropped item'), context: { message } })
|
||||
})
|
||||
|
||||
it('should process single item through pipeline with dlq result', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const pipeline = new StartPipeline<{ data: string }>().pipe((_input) => {
|
||||
return dlq('dlq item', new Error('test error'))
|
||||
})
|
||||
|
||||
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
expect(result).toEqual({ result: dlq('dlq item', new Error('test error')), context: { message } })
|
||||
})
|
||||
|
||||
it('should process single item through pipeline with redirect result', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const pipeline = new StartPipeline<{ data: string }>().pipe((_input) => {
|
||||
return redirect('redirect item', 'retry-topic')
|
||||
})
|
||||
|
||||
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
expect(result).toEqual({ result: redirect('redirect item', 'retry-topic'), context: { message } })
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipe() - synchronous steps', () => {
|
||||
it('should return StepPipeline instance and call the step', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
const step = jest.fn().mockReturnValue(ok({ processed: 'test' }))
|
||||
|
||||
const pipeline = new StartPipeline<{ data: string }>()
|
||||
const stepPipeline = pipeline.pipe(step)
|
||||
|
||||
expect(stepPipeline).toBeInstanceOf(StepPipeline)
|
||||
|
||||
const result = await stepPipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
|
||||
expect(step).toHaveBeenCalledWith({ data: 'test' })
|
||||
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeAsync() - async steps', () => {
|
||||
it('should return StepPipeline instance and call the async step', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
const asyncStep = jest.fn().mockResolvedValue(ok({ processed: 'test' }))
|
||||
|
||||
const pipeline = new StartPipeline<{ data: string }>()
|
||||
const stepPipeline = pipeline.pipeAsync(asyncStep)
|
||||
|
||||
expect(stepPipeline).toBeInstanceOf(StepPipeline)
|
||||
|
||||
const result = await stepPipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
|
||||
expect(asyncStep).toHaveBeenCalledWith({ data: 'test' })
|
||||
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
|
||||
})
|
||||
})
|
||||
})
|
||||
27
plugin-server/src/ingestion/pipelines/start-pipeline.ts
Normal file
27
plugin-server/src/ingestion/pipelines/start-pipeline.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { instrumentFn } from '~/common/tracing/tracing-utils'
|
||||
|
||||
import { Pipeline, PipelineResultWithContext } from './pipeline.interface'
|
||||
import { StepPipeline } from './step-pipeline'
|
||||
import { AsyncProcessingStep, SyncProcessingStep } from './steps'
|
||||
|
||||
export class StartPipeline<T> implements Pipeline<T, T> {
|
||||
async process(input: PipelineResultWithContext<T>): Promise<PipelineResultWithContext<T>> {
|
||||
return Promise.resolve(input)
|
||||
}
|
||||
|
||||
pipe<U>(step: SyncProcessingStep<T, U>): StepPipeline<T, T, U> {
|
||||
const stepName = step.name || 'anonymousStep'
|
||||
const wrappedStep = async (value: T) => {
|
||||
return await instrumentFn(stepName, () => Promise.resolve(step(value)))
|
||||
}
|
||||
return new StepPipeline<T, T, U>(wrappedStep, this)
|
||||
}
|
||||
|
||||
pipeAsync<U>(step: AsyncProcessingStep<T, U>): StepPipeline<T, T, U> {
|
||||
const stepName = step.name || 'anonymousAsyncStep'
|
||||
const wrappedStep = async (value: T) => {
|
||||
return await instrumentFn(stepName, () => step(value))
|
||||
}
|
||||
return new StepPipeline<T, T, U>(wrappedStep, this)
|
||||
}
|
||||
}
|
||||
140
plugin-server/src/ingestion/pipelines/step-pipeline.test.ts
Normal file
140
plugin-server/src/ingestion/pipelines/step-pipeline.test.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { drop, isOkResult, ok } from './results'
|
||||
import { StartPipeline } from './start-pipeline'
|
||||
import { StepPipeline } from './step-pipeline'
|
||||
|
||||
describe('StepPipeline', () => {
|
||||
describe('constructor', () => {
|
||||
it('should create instance with step and previous pipeline', () => {
|
||||
const mockStep = jest.fn()
|
||||
const mockPrevious = {} as any
|
||||
|
||||
const pipeline = new StepPipeline(mockStep, mockPrevious)
|
||||
|
||||
expect(pipeline).toBeInstanceOf(StepPipeline)
|
||||
})
|
||||
})
|
||||
|
||||
describe('process', () => {
|
||||
it('should execute step when previous result is success', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const step = jest.fn().mockResolvedValue(ok({ processed: 'test' }))
|
||||
const previous = new StartPipeline<{ data: string }>()
|
||||
|
||||
const pipeline = new StepPipeline(step, previous)
|
||||
const result = await pipeline.process({ result: ok({ data: 'test' }), context: { message } })
|
||||
|
||||
expect(step).toHaveBeenCalledWith({ data: 'test' })
|
||||
expect(result).toEqual({ result: ok({ processed: 'test' }), context: { message } })
|
||||
})
|
||||
|
||||
it('should skip step when previous result is not success', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const step = jest.fn()
|
||||
const previous = new StartPipeline<{ data: string }>()
|
||||
|
||||
const pipeline = new StepPipeline(step, previous)
|
||||
const result = await pipeline.process({ result: drop('dropped'), context: { message } })
|
||||
|
||||
expect(step).not.toHaveBeenCalled()
|
||||
expect(result).toEqual({ result: drop('dropped'), context: { message } })
|
||||
})
|
||||
|
||||
it('should handle step errors', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const step = jest.fn().mockRejectedValue(new Error('Step failed'))
|
||||
const previous = new StartPipeline<{ data: string }>()
|
||||
|
||||
const pipeline = new StepPipeline(step, previous)
|
||||
|
||||
await expect(pipeline.process({ result: ok({ data: 'test' }), context: { message } })).rejects.toThrow(
|
||||
'Step failed'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipe', () => {
|
||||
it('should create new StepPipeline with additional step', () => {
|
||||
const step1 = jest.fn()
|
||||
const step2 = jest.fn()
|
||||
const previous = {} as any
|
||||
|
||||
const pipeline1 = new StepPipeline(step1, previous)
|
||||
const pipeline2 = pipeline1.pipe(step2)
|
||||
|
||||
expect(pipeline2).toBeInstanceOf(StepPipeline)
|
||||
})
|
||||
|
||||
it('should execute steps in order when processing through chained pipeline', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const step1 = jest.fn().mockImplementation((input) => {
|
||||
return Promise.resolve(ok({ value: input.value + 5 })) // Add 5
|
||||
})
|
||||
const step2 = jest.fn().mockImplementation((input) => {
|
||||
return Promise.resolve(ok({ value: input.value * 2 })) // Multiply by 2
|
||||
})
|
||||
const previous = new StartPipeline<{ value: number }>()
|
||||
|
||||
const pipeline1 = new StepPipeline(step1, previous)
|
||||
const pipeline2 = pipeline1.pipe(step2)
|
||||
|
||||
const result = await pipeline2.process({ result: ok({ value: 10 }), context: { message } })
|
||||
|
||||
expect(step1).toHaveBeenCalledWith({ value: 10 })
|
||||
expect(step2).toHaveBeenCalledWith({ value: 15 }) // 10 + 5
|
||||
const pipelineResult = result.result
|
||||
expect(isOkResult(pipelineResult)).toBe(true)
|
||||
if (isOkResult(pipelineResult)) {
|
||||
expect(pipelineResult.value).toEqual({ value: 30 }) // (10 + 5) * 2 = 30
|
||||
}
|
||||
expect(step1).toHaveBeenCalledTimes(1)
|
||||
expect(step2).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeAsync', () => {
|
||||
it('should create new StepPipeline with async step', () => {
|
||||
const step1 = jest.fn()
|
||||
const asyncStep = jest.fn()
|
||||
const previous = {} as any
|
||||
|
||||
const pipeline1 = new StepPipeline(step1, previous)
|
||||
const pipeline2 = pipeline1.pipeAsync(asyncStep)
|
||||
|
||||
expect(pipeline2).toBeInstanceOf(StepPipeline)
|
||||
})
|
||||
|
||||
it('should execute steps in order when processing through chained async pipeline', async () => {
|
||||
const message: Message = { value: Buffer.from('test'), topic: 'test', partition: 0, offset: 1 } as Message
|
||||
|
||||
const step1 = jest.fn().mockImplementation((input) => {
|
||||
return Promise.resolve(ok({ value: input.value * 3 })) // Multiply by 3
|
||||
})
|
||||
const asyncStep = jest.fn().mockImplementation(async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return ok({ value: input.value - 2 }) // Subtract 2
|
||||
})
|
||||
const previous = new StartPipeline<{ value: number }>()
|
||||
|
||||
const pipeline1 = new StepPipeline(step1, previous)
|
||||
const pipeline2 = pipeline1.pipeAsync(asyncStep)
|
||||
|
||||
const result = await pipeline2.process({ result: ok({ value: 4 }), context: { message } })
|
||||
|
||||
expect(step1).toHaveBeenCalledWith({ value: 4 })
|
||||
expect(asyncStep).toHaveBeenCalledWith({ value: 12 }) // 4 * 3
|
||||
const pipelineResult = result.result
|
||||
expect(isOkResult(pipelineResult)).toBe(true)
|
||||
if (isOkResult(pipelineResult)) {
|
||||
expect(pipelineResult.value).toEqual({ value: 10 }) // (4 * 3) - 2 = 10
|
||||
}
|
||||
expect(step1).toHaveBeenCalledTimes(1)
|
||||
expect(asyncStep).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
})
|
||||
})
|
||||
49
plugin-server/src/ingestion/pipelines/step-pipeline.ts
Normal file
49
plugin-server/src/ingestion/pipelines/step-pipeline.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { instrumentFn } from '../../common/tracing/tracing-utils'
|
||||
import { Pipeline, PipelineResultWithContext } from './pipeline.interface'
|
||||
import { PipelineResult, isOkResult } from './results'
|
||||
import { AsyncProcessingStep, SyncProcessingStep } from './steps'
|
||||
|
||||
export class StepPipeline<TInput, TIntermediate, TOutput> implements Pipeline<TInput, TOutput> {
|
||||
constructor(
|
||||
private currentStep: (value: TIntermediate) => Promise<PipelineResult<TOutput>>,
|
||||
private previousPipeline: Pipeline<TInput, TIntermediate>
|
||||
) {}
|
||||
|
||||
pipe<U>(step: SyncProcessingStep<TOutput, U>): StepPipeline<TInput, TOutput, U> {
|
||||
const stepName = step.name || 'anonymousStep'
|
||||
const wrappedStep = async (value: TOutput) => {
|
||||
return await instrumentFn(stepName, () => Promise.resolve(step(value)))
|
||||
}
|
||||
return new StepPipeline<TInput, TOutput, U>(wrappedStep, this)
|
||||
}
|
||||
|
||||
pipeAsync<U>(step: AsyncProcessingStep<TOutput, U>): StepPipeline<TInput, TOutput, U> {
|
||||
const stepName = step.name || 'anonymousAsyncStep'
|
||||
const wrappedStep = async (value: TOutput) => {
|
||||
return await instrumentFn(stepName, () => step(value))
|
||||
}
|
||||
return new StepPipeline<TInput, TOutput, U>(wrappedStep, this)
|
||||
}
|
||||
|
||||
async process(input: PipelineResultWithContext<TInput>): Promise<PipelineResultWithContext<TOutput>> {
|
||||
// Process through the previous pipeline first
|
||||
const previousResultWithContext = await this.previousPipeline.process(input)
|
||||
|
||||
// If the previous step failed, return the failure with preserved context
|
||||
const previousResult = previousResultWithContext.result
|
||||
if (!isOkResult(previousResult)) {
|
||||
return {
|
||||
result: previousResult,
|
||||
context: previousResultWithContext.context,
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the current step to the successful result value from previous pipeline
|
||||
const currentResult = await this.currentStep(previousResult.value)
|
||||
|
||||
return {
|
||||
result: currentResult,
|
||||
context: previousResultWithContext.context,
|
||||
}
|
||||
}
|
||||
}
|
||||
11
plugin-server/src/ingestion/pipelines/steps.ts
Normal file
11
plugin-server/src/ingestion/pipelines/steps.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
import { PipelineResult } from './results'
|
||||
|
||||
/**
|
||||
* Synchronous processing step that takes a value and returns a processing result
|
||||
*/
|
||||
export type SyncProcessingStep<T, U> = (value: T) => PipelineResult<U>
|
||||
|
||||
/**
|
||||
* Asynchronous processing step that takes a value and returns a promise of processing result
|
||||
*/
|
||||
export type AsyncProcessingStep<T, U> = (value: T) => Promise<PipelineResult<U>>
|
||||
@@ -1,358 +0,0 @@
|
||||
import { dlq, drop, redirect, success } from '../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import {
|
||||
AsyncPreprocessingStep,
|
||||
AsyncProcessingPipeline,
|
||||
ProcessingPipeline,
|
||||
SyncPreprocessingStep,
|
||||
} from './processing-pipeline'
|
||||
|
||||
describe('ProcessingPipeline', () => {
|
||||
describe('static methods', () => {
|
||||
it('should create pipeline with success result using of()', () => {
|
||||
const value = { test: 'data' }
|
||||
const pipeline = ProcessingPipeline.of(value)
|
||||
|
||||
const result = pipeline.unwrap()
|
||||
expect(result).toEqual(success(value))
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipe() - synchronous steps', () => {
|
||||
it('should execute step when result is success', () => {
|
||||
const initialValue = { count: 1 }
|
||||
const step: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const result = ProcessingPipeline.of(initialValue).pipe(step).unwrap()
|
||||
|
||||
expect(result).toEqual(success({ count: 2 }))
|
||||
})
|
||||
|
||||
it('should skip step when result is drop', () => {
|
||||
const initialValue = { count: 1 }
|
||||
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return drop('dropped by first step')
|
||||
}
|
||||
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
})
|
||||
|
||||
const result = ProcessingPipeline.of(initialValue).pipe(dropStep).pipe(secondStep).unwrap()
|
||||
|
||||
expect(result).toEqual(drop('dropped by first step'))
|
||||
expect(secondStep).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should skip step when result is redirect', () => {
|
||||
const initialValue = { count: 1 }
|
||||
const redirectStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return redirect('test redirect', 'overflow-topic', true, false)
|
||||
}
|
||||
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
})
|
||||
|
||||
const result = ProcessingPipeline.of(initialValue).pipe(redirectStep).pipe(secondStep).unwrap()
|
||||
|
||||
expect(result).toEqual(redirect('test redirect', 'overflow-topic', true, false))
|
||||
expect(secondStep).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should skip step when result is dlq', () => {
|
||||
const initialValue = { count: 1 }
|
||||
const dlqStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return dlq('test dlq', new Error('test error'))
|
||||
}
|
||||
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
})
|
||||
|
||||
const result = ProcessingPipeline.of(initialValue).pipe(dlqStep).pipe(secondStep).unwrap()
|
||||
|
||||
expect(result).toEqual(dlq('test dlq', new Error('test error')))
|
||||
expect(secondStep).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should chain multiple synchronous steps', () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: SyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = (input) => {
|
||||
return success({ count: input.count, doubled: input.count * 2 })
|
||||
}
|
||||
|
||||
const step3: SyncPreprocessingStep<{ count: number; doubled: number }, { final: string }> = (input) => {
|
||||
return success({ final: `count: ${input.count}, doubled: ${input.doubled}` })
|
||||
}
|
||||
|
||||
const result = ProcessingPipeline.of(initialValue).pipe(step1).pipe(step2).pipe(step3).unwrap()
|
||||
|
||||
expect(result).toEqual(success({ final: 'count: 1, doubled: 2' }))
|
||||
})
|
||||
|
||||
it('should stop chain when step returns drop', () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: SyncPreprocessingStep<{ count: number }, { count: number }> = () => {
|
||||
return drop('step2 dropped')
|
||||
}
|
||||
|
||||
const step3: SyncPreprocessingStep<{ count: number }, { final: string }> = (input) => {
|
||||
return success({ final: `count: ${input.count}` })
|
||||
}
|
||||
|
||||
const result = ProcessingPipeline.of(initialValue).pipe(step1).pipe(step2).pipe(step3).unwrap()
|
||||
|
||||
expect(result).toEqual(drop('step2 dropped'))
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeAsync() - mixed sync/async steps', () => {
|
||||
it('should transition to AsyncProcessingPipeline', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const asyncPipeline = ProcessingPipeline.of(initialValue).pipeAsync(asyncStep)
|
||||
expect(asyncPipeline).toBeInstanceOf(AsyncProcessingPipeline)
|
||||
|
||||
const result = await asyncPipeline.unwrap()
|
||||
expect(result).toEqual(success({ count: 2 }))
|
||||
})
|
||||
|
||||
it('should not execute async step when result is failure', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return drop('initial drop')
|
||||
}
|
||||
const asyncStep: AsyncPreprocessingStep<{ count: number }, { executed: boolean }> = jest.fn(async () => {
|
||||
await Promise.resolve()
|
||||
return success({ executed: true })
|
||||
})
|
||||
|
||||
const result = await ProcessingPipeline.of(initialValue).pipe(dropStep).pipeAsync(asyncStep).unwrap()
|
||||
|
||||
expect(result).toEqual(drop('initial drop'))
|
||||
expect(asyncStep).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('AsyncProcessingPipeline', () => {
|
||||
describe('pipe() - synchronous steps on async pipeline', () => {
|
||||
it('should execute sync step after async step', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
const syncStep: SyncPreprocessingStep<{ count: number }, { count: number; final: boolean }> = (input) => {
|
||||
return success({ count: input.count, final: true })
|
||||
}
|
||||
|
||||
const result = await ProcessingPipeline.of(initialValue).pipeAsync(asyncStep).pipe(syncStep).unwrap()
|
||||
|
||||
expect(result).toEqual(success({ count: 2, final: true }))
|
||||
})
|
||||
|
||||
it('should skip sync step when async result is failure', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return drop('async drop')
|
||||
}
|
||||
const syncStep: SyncPreprocessingStep<{ count: number }, { final: boolean }> = jest.fn((_input) => {
|
||||
return success({ final: true })
|
||||
})
|
||||
|
||||
const result = await ProcessingPipeline.of(initialValue).pipeAsync(asyncStep).pipe(syncStep).unwrap()
|
||||
|
||||
expect(result).toEqual(drop('async drop'))
|
||||
expect(syncStep).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeAsync() - chaining async steps', () => {
|
||||
it('should chain multiple async steps', async () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: AsyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = async (
|
||||
input
|
||||
) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count, doubled: input.count * 2 })
|
||||
}
|
||||
|
||||
const result = await ProcessingPipeline.of(initialValue).pipeAsync(step1).pipeAsync(step2).unwrap()
|
||||
|
||||
expect(result).toEqual(success({ count: 1, doubled: 2 }))
|
||||
})
|
||||
|
||||
it('should stop chain when async step returns failure', async () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: AsyncPreprocessingStep<{ count: number }, { count: number }> = async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return redirect('async redirect', 'overflow-topic', false, true)
|
||||
}
|
||||
|
||||
const step3: AsyncPreprocessingStep<{ count: number }, { final: string }> = jest.fn(async (input) => {
|
||||
await Promise.resolve()
|
||||
return success({ final: `count: ${input.count}` })
|
||||
})
|
||||
|
||||
const result = await ProcessingPipeline.of(initialValue)
|
||||
.pipeAsync(step1)
|
||||
.pipeAsync(step2)
|
||||
.pipeAsync(step3)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual(redirect('async redirect', 'overflow-topic', false, true))
|
||||
expect(step3).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('mixed sync and async steps', () => {
|
||||
it('should handle complex pipeline with mixed step types', async () => {
|
||||
const initialValue = { value: 'start' }
|
||||
|
||||
const syncStep1: SyncPreprocessingStep<typeof initialValue, { value: string; step1: boolean }> = (
|
||||
input
|
||||
) => {
|
||||
return success({ value: input.value + '-sync1', step1: true })
|
||||
}
|
||||
|
||||
const asyncStep1: AsyncPreprocessingStep<
|
||||
{ value: string; step1: boolean },
|
||||
{ value: string; step1: boolean; async1: boolean }
|
||||
> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ ...input, value: input.value + '-async1', async1: true })
|
||||
}
|
||||
|
||||
const syncStep2: SyncPreprocessingStep<
|
||||
{ value: string; step1: boolean; async1: boolean },
|
||||
{ final: string }
|
||||
> = (input) => {
|
||||
return success({ final: `${input.value}-sync2` })
|
||||
}
|
||||
|
||||
const result = await ProcessingPipeline.of(initialValue)
|
||||
.pipe(syncStep1)
|
||||
.pipeAsync(asyncStep1)
|
||||
.pipe(syncStep2)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual(success({ final: 'start-sync1-async1-sync2' }))
|
||||
})
|
||||
})
|
||||
|
||||
describe('error handling', () => {
|
||||
it('should handle async step that throws an error', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const errorStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
|
||||
await Promise.resolve()
|
||||
throw new Error('Step failed')
|
||||
}
|
||||
|
||||
await expect(ProcessingPipeline.of(initialValue).pipeAsync(errorStep).unwrap()).rejects.toThrow(
|
||||
'Step failed'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle sync step that throws an error', () => {
|
||||
const initialValue = { count: 1 }
|
||||
const errorStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
throw new Error('Sync step failed')
|
||||
}
|
||||
|
||||
expect(() => {
|
||||
ProcessingPipeline.of(initialValue).pipe(errorStep).unwrap()
|
||||
}).toThrow('Sync step failed')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('Type safety and generics', () => {
|
||||
it('should maintain type safety through pipeline transformations', () => {
|
||||
interface Input1 {
|
||||
a: number
|
||||
}
|
||||
interface Input2 {
|
||||
b: string
|
||||
}
|
||||
interface Input3 {
|
||||
c: boolean
|
||||
}
|
||||
|
||||
const step1: SyncPreprocessingStep<Input1, Input2> = (input) => {
|
||||
expect(typeof input.a).toBe('number')
|
||||
return success({ b: input.a.toString() })
|
||||
}
|
||||
|
||||
const step2: SyncPreprocessingStep<Input2, Input3> = (input) => {
|
||||
expect(typeof input.b).toBe('string')
|
||||
return success({ c: input.b === '42' })
|
||||
}
|
||||
|
||||
const result = ProcessingPipeline.of({ a: 42 }).pipe(step1).pipe(step2).unwrap()
|
||||
|
||||
expect(result).toEqual(success({ c: true }))
|
||||
})
|
||||
|
||||
it('should work with complex nested types', () => {
|
||||
interface ComplexInput {
|
||||
user: { id: string; name: string }
|
||||
metadata: { timestamp: number; source: string }
|
||||
}
|
||||
|
||||
interface ProcessedOutput {
|
||||
userId: string
|
||||
displayName: string
|
||||
processedAt: string
|
||||
}
|
||||
|
||||
const processStep: SyncPreprocessingStep<ComplexInput, ProcessedOutput> = (input) => {
|
||||
return success({
|
||||
userId: input.user.id,
|
||||
displayName: input.user.name.toUpperCase(),
|
||||
processedAt: new Date(input.metadata.timestamp).toISOString(),
|
||||
})
|
||||
}
|
||||
|
||||
const complexInput: ComplexInput = {
|
||||
user: { id: 'user123', name: 'John Doe' },
|
||||
metadata: { timestamp: 1640995200000, source: 'api' },
|
||||
}
|
||||
|
||||
const result = ProcessingPipeline.of(complexInput).pipe(processStep).unwrap()
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
userId: 'user123',
|
||||
displayName: 'JOHN DOE',
|
||||
processedAt: '2022-01-01T00:00:00.000Z',
|
||||
})
|
||||
)
|
||||
})
|
||||
})
|
||||
@@ -1,76 +0,0 @@
|
||||
import {
|
||||
PipelineStepResult,
|
||||
PipelineStepResultType,
|
||||
isSuccessResult,
|
||||
} from '../worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
|
||||
export type ProcessingResult<T> = PipelineStepResult<T>
|
||||
|
||||
export class AsyncProcessingPipeline<T> {
|
||||
constructor(private resultPromise: Promise<ProcessingResult<T>>) {}
|
||||
|
||||
pipe<U>(step: (value: T) => ProcessingResult<U>): AsyncProcessingPipeline<U> {
|
||||
const nextResultPromise = this.resultPromise.then((currentResult) => {
|
||||
if (!isSuccessResult(currentResult)) {
|
||||
return currentResult
|
||||
}
|
||||
|
||||
return step(currentResult.value)
|
||||
})
|
||||
|
||||
return new AsyncProcessingPipeline(nextResultPromise)
|
||||
}
|
||||
|
||||
pipeAsync<U>(step: (value: T) => Promise<ProcessingResult<U>>): AsyncProcessingPipeline<U> {
|
||||
const nextResultPromise = this.resultPromise.then(async (currentResult) => {
|
||||
if (!isSuccessResult(currentResult)) {
|
||||
return currentResult
|
||||
}
|
||||
|
||||
return await step(currentResult.value)
|
||||
})
|
||||
|
||||
return new AsyncProcessingPipeline(nextResultPromise)
|
||||
}
|
||||
|
||||
async unwrap(): Promise<ProcessingResult<T>> {
|
||||
return await this.resultPromise
|
||||
}
|
||||
}
|
||||
|
||||
export class ProcessingPipeline<T> {
|
||||
constructor(private result: ProcessingResult<T>) {}
|
||||
|
||||
pipe<U>(step: (value: T) => ProcessingResult<U>): ProcessingPipeline<U> {
|
||||
if (!isSuccessResult(this.result)) {
|
||||
return new ProcessingPipeline(this.result)
|
||||
}
|
||||
|
||||
const stepResult = step(this.result.value)
|
||||
return new ProcessingPipeline(stepResult)
|
||||
}
|
||||
|
||||
pipeAsync<U>(step: (value: T) => Promise<ProcessingResult<U>>): AsyncProcessingPipeline<U> {
|
||||
if (!isSuccessResult(this.result)) {
|
||||
const failurePromise = Promise.resolve(this.result)
|
||||
return new AsyncProcessingPipeline(failurePromise)
|
||||
}
|
||||
|
||||
const stepResultPromise = step(this.result.value)
|
||||
return new AsyncProcessingPipeline(stepResultPromise)
|
||||
}
|
||||
|
||||
unwrap(): ProcessingResult<T> {
|
||||
return this.result
|
||||
}
|
||||
|
||||
static of<T>(value: T): ProcessingPipeline<T> {
|
||||
return new ProcessingPipeline({ type: PipelineStepResultType.OK, value })
|
||||
}
|
||||
}
|
||||
|
||||
export type SyncPreprocessingStep<T, U> = (value: T) => ProcessingResult<U>
|
||||
|
||||
export type AsyncPreprocessingStep<T, U> = (value: T) => Promise<ProcessingResult<U>>
|
||||
|
||||
export type PreprocessingStep<T, U> = SyncPreprocessingStep<T, U> | AsyncPreprocessingStep<T, U>
|
||||
@@ -1,74 +0,0 @@
|
||||
export enum PipelineStepResultType {
|
||||
OK,
|
||||
DLQ,
|
||||
DROP,
|
||||
REDIRECT,
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic result type for pipeline steps that can succeed, be dropped, or sent to DLQ
|
||||
*/
|
||||
export type PipelineStepResultOk<T> = { type: PipelineStepResultType.OK; value: T }
|
||||
export type PipelineStepResultDlq = { type: PipelineStepResultType.DLQ; reason: string; error: unknown }
|
||||
export type PipelineStepResultDrop = { type: PipelineStepResultType.DROP; reason: string }
|
||||
export type PipelineStepResultRedirect = {
|
||||
type: PipelineStepResultType.REDIRECT
|
||||
reason: string
|
||||
topic: string
|
||||
preserveKey?: boolean
|
||||
awaitAck?: boolean
|
||||
}
|
||||
export type PipelineStepResult<T> =
|
||||
| PipelineStepResultOk<T>
|
||||
| PipelineStepResultDlq
|
||||
| PipelineStepResultDrop
|
||||
| PipelineStepResultRedirect
|
||||
|
||||
/**
|
||||
* Helper functions for creating pipeline step results
|
||||
*/
|
||||
export function success<T>(value: T): PipelineStepResult<T> {
|
||||
return { type: PipelineStepResultType.OK, value }
|
||||
}
|
||||
|
||||
export function dlq<T>(reason: string, error?: any): PipelineStepResult<T> {
|
||||
return { type: PipelineStepResultType.DLQ, reason, error }
|
||||
}
|
||||
|
||||
export function drop<T>(reason: string): PipelineStepResult<T> {
|
||||
return { type: PipelineStepResultType.DROP, reason }
|
||||
}
|
||||
|
||||
export function redirect<T>(
|
||||
reason: string,
|
||||
topic: string,
|
||||
preserveKey: boolean = true,
|
||||
awaitAck: boolean = true
|
||||
): PipelineStepResult<T> {
|
||||
return {
|
||||
type: PipelineStepResultType.REDIRECT,
|
||||
reason,
|
||||
topic,
|
||||
preserveKey,
|
||||
awaitAck,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard functions
|
||||
*/
|
||||
export function isSuccessResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultOk<T> {
|
||||
return result.type === PipelineStepResultType.OK
|
||||
}
|
||||
|
||||
export function isDlqResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultDlq {
|
||||
return result.type === PipelineStepResultType.DLQ
|
||||
}
|
||||
|
||||
export function isDropResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultDrop {
|
||||
return result.type === PipelineStepResultType.DROP
|
||||
}
|
||||
|
||||
export function isRedirectResult<T>(result: PipelineStepResult<T>): result is PipelineStepResultRedirect {
|
||||
return result.type === PipelineStepResultType.REDIRECT
|
||||
}
|
||||
@@ -4,12 +4,12 @@ import { PluginEvent } from '@posthog/plugin-scaffold'
|
||||
|
||||
import { Person, Team } from '~/types'
|
||||
|
||||
import { PipelineResult, isOkResult, ok } from '../../../ingestion/pipelines/results'
|
||||
import { PersonContext } from '../persons/person-context'
|
||||
import { PersonEventProcessor } from '../persons/person-event-processor'
|
||||
import { PersonMergeService } from '../persons/person-merge-service'
|
||||
import { PersonPropertyService } from '../persons/person-property-service'
|
||||
import { PersonsStoreForBatch } from '../persons/persons-store-for-batch'
|
||||
import { PipelineStepResult, isSuccessResult, success } from './pipeline-step-result'
|
||||
import { EventPipelineRunner } from './runner'
|
||||
|
||||
export async function processPersonsStep(
|
||||
@@ -19,7 +19,7 @@ export async function processPersonsStep(
|
||||
timestamp: DateTime,
|
||||
processPerson: boolean,
|
||||
personStoreBatch: PersonsStoreForBatch
|
||||
): Promise<PipelineStepResult<[PluginEvent, Person, Promise<void>]>> {
|
||||
): Promise<PipelineResult<[PluginEvent, Person, Promise<void>]>> {
|
||||
const context = new PersonContext(
|
||||
event,
|
||||
team,
|
||||
@@ -39,8 +39,8 @@ export async function processPersonsStep(
|
||||
)
|
||||
const [result, kafkaAck] = await processor.processEvent()
|
||||
|
||||
if (isSuccessResult(result)) {
|
||||
return success([event, result.value, kafkaAck])
|
||||
if (isOkResult(result)) {
|
||||
return ok([event, result.value, kafkaAck])
|
||||
} else {
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { PluginEvent } from '@posthog/plugin-scaffold'
|
||||
|
||||
import { HogTransformerService } from '../../../cdp/hog-transformations/hog-transformer.service'
|
||||
import { isDlqResult, isDropResult, isOkResult, isRedirectResult } from '../../../ingestion/pipelines/results'
|
||||
import { eventDroppedCounter } from '../../../main/ingestion-queues/metrics'
|
||||
import { EventHeaders, Hub, PipelineEvent, Team } from '../../../types'
|
||||
import { DependencyUnavailableError } from '../../../utils/db/error'
|
||||
@@ -29,7 +30,6 @@ import {
|
||||
pipelineStepThrowCounter,
|
||||
} from './metrics'
|
||||
import { normalizeEventStep } from './normalizeEventStep'
|
||||
import { isDlqResult, isDropResult, isRedirectResult, isSuccessResult } from './pipeline-step-result'
|
||||
import { prepareEventStep } from './prepareEventStep'
|
||||
import { processPersonsStep } from './processPersonsStep'
|
||||
import { transformEventStep } from './transformEventStep'
|
||||
@@ -314,7 +314,7 @@ export class EventPipelineRunner {
|
||||
event.team_id
|
||||
)
|
||||
|
||||
if (!isSuccessResult(personStepResult)) {
|
||||
if (!isOkResult(personStepResult)) {
|
||||
// Handle DLQ/drop/redirect cases - return early from pipeline
|
||||
if (isDlqResult(personStepResult)) {
|
||||
await this.sendToDLQ(event, personStepResult.error, 'processPersonsStep')
|
||||
|
||||
@@ -4,9 +4,9 @@ import { DateTime } from 'luxon'
|
||||
import { PluginEvent } from '@posthog/plugin-scaffold'
|
||||
|
||||
import { ONE_HOUR } from '../../../config/constants'
|
||||
import { PipelineResult, dlq, ok, redirect } from '../../../ingestion/pipelines/results'
|
||||
import { InternalPerson, Person } from '../../../types'
|
||||
import { logger } from '../../../utils/logger'
|
||||
import { PipelineStepResult, dlq, redirect, success } from '../event-pipeline/pipeline-step-result'
|
||||
import { uuidFromDistinctId } from '../person-uuid'
|
||||
import { PersonContext } from './person-context'
|
||||
import { PersonMergeService } from './person-merge-service'
|
||||
@@ -35,7 +35,7 @@ export class PersonEventProcessor {
|
||||
private mergeService: PersonMergeService
|
||||
) {}
|
||||
|
||||
async processEvent(): Promise<[PipelineStepResult<Person>, Promise<void>]> {
|
||||
async processEvent(): Promise<[PipelineResult<Person>, Promise<void>]> {
|
||||
if (!this.context.processPerson) {
|
||||
return await this.handlePersonlessMode()
|
||||
}
|
||||
@@ -65,10 +65,7 @@ export class PersonEventProcessor {
|
||||
try {
|
||||
const [updatedPerson, updateKafkaAck] =
|
||||
await this.propertyService.updatePersonProperties(personFromMerge)
|
||||
return [
|
||||
success(updatedPerson),
|
||||
Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined),
|
||||
]
|
||||
return [ok(updatedPerson), Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined)]
|
||||
} catch (error) {
|
||||
// Shortcut didn't work, swallow the error and try normal retry loop below
|
||||
logger.debug('🔁', `failed update after adding distinct IDs, retrying`, { error })
|
||||
@@ -77,10 +74,10 @@ export class PersonEventProcessor {
|
||||
|
||||
// Handle regular property updates
|
||||
const [updatedPerson, updateKafkaAck] = await this.propertyService.handleUpdate()
|
||||
return [success(updatedPerson), Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined)]
|
||||
return [ok(updatedPerson), Promise.all([identifyOrAliasKafkaAck, updateKafkaAck]).then(() => undefined)]
|
||||
}
|
||||
|
||||
private async handlePersonlessMode(): Promise<[PipelineStepResult<Person>, Promise<void>]> {
|
||||
private async handlePersonlessMode(): Promise<[PipelineResult<Person>, Promise<void>]> {
|
||||
let existingPerson = await this.context.personStore.fetchForChecking(
|
||||
this.context.team.id,
|
||||
this.context.distinctId
|
||||
@@ -135,7 +132,7 @@ export class PersonEventProcessor {
|
||||
person.force_upgrade = true
|
||||
}
|
||||
|
||||
return [success(person), Promise.resolve()]
|
||||
return [ok(person), Promise.resolve()]
|
||||
}
|
||||
|
||||
// We need a value from the `person_created_column` in ClickHouse. This should be
|
||||
@@ -150,14 +147,14 @@ export class PersonEventProcessor {
|
||||
uuid: uuidFromDistinctId(this.context.team.id, this.context.distinctId),
|
||||
created_at: createdAt,
|
||||
}
|
||||
return [success(fakePerson), Promise.resolve()]
|
||||
return [ok(fakePerson), Promise.resolve()]
|
||||
}
|
||||
|
||||
getContext(): PersonContext {
|
||||
return this.context
|
||||
}
|
||||
|
||||
private handleMergeError(error: unknown, event: PluginEvent): PipelineStepResult<Person> | null {
|
||||
private handleMergeError(error: unknown, event: PluginEvent): PipelineResult<Person> | null {
|
||||
const mergeMode = this.context.mergeMode
|
||||
|
||||
if (error instanceof PersonMergeLimitExceededError) {
|
||||
|
||||
@@ -1,613 +0,0 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { AsyncPreprocessingStep, SyncPreprocessingStep } from '../../ingestion/processing-pipeline'
|
||||
import { KafkaProducerWrapper } from '../../kafka/producer'
|
||||
import { PromiseScheduler } from '../../utils/promise-scheduler'
|
||||
import { dlq, drop, redirect, success } from './event-pipeline/pipeline-step-result'
|
||||
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from './pipeline-helpers'
|
||||
import { AsyncResultHandlingPipeline, PipelineConfig, ResultHandlingPipeline } from './result-handling-pipeline'
|
||||
|
||||
// Mock the pipeline helpers
|
||||
jest.mock('./pipeline-helpers', () => ({
|
||||
logDroppedMessage: jest.fn(),
|
||||
redirectMessageToTopic: jest.fn(),
|
||||
sendMessageToDLQ: jest.fn(),
|
||||
}))
|
||||
|
||||
const mockLogDroppedMessage = logDroppedMessage as jest.MockedFunction<typeof logDroppedMessage>
|
||||
const mockRedirectMessageToTopic = redirectMessageToTopic as jest.MockedFunction<typeof redirectMessageToTopic>
|
||||
const mockSendMessageToDLQ = sendMessageToDLQ as jest.MockedFunction<typeof sendMessageToDLQ>
|
||||
|
||||
describe('ResultHandlingPipeline', () => {
|
||||
let mockKafkaProducer: KafkaProducerWrapper
|
||||
let mockPromiseScheduler: PromiseScheduler
|
||||
let mockMessage: Message
|
||||
let config: PipelineConfig
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks()
|
||||
|
||||
mockKafkaProducer = {
|
||||
producer: {} as any,
|
||||
queueMessages: jest.fn(),
|
||||
} as unknown as KafkaProducerWrapper
|
||||
|
||||
mockPromiseScheduler = {
|
||||
schedule: jest.fn(),
|
||||
} as unknown as PromiseScheduler
|
||||
|
||||
mockMessage = {
|
||||
value: Buffer.from('test message'),
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 123,
|
||||
key: 'test-key',
|
||||
headers: [],
|
||||
size: 12,
|
||||
} as Message
|
||||
|
||||
config = {
|
||||
kafkaProducer: mockKafkaProducer,
|
||||
dlqTopic: 'test-dlq',
|
||||
promiseScheduler: mockPromiseScheduler,
|
||||
}
|
||||
})
|
||||
|
||||
describe('static methods', () => {
|
||||
it('should create pipeline with success result using of()', async () => {
|
||||
const value = { test: 'data' }
|
||||
const pipeline = ResultHandlingPipeline.of(value, mockMessage, config)
|
||||
|
||||
const result = await pipeline.unwrap()
|
||||
expect(result).toEqual(value)
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipe() - synchronous steps', () => {
|
||||
it('should execute step when result is success', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const step: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config).pipe(step).unwrap()
|
||||
|
||||
expect(result).toEqual({ count: 2 })
|
||||
})
|
||||
|
||||
it('should handle drop result and return null', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return drop('test drop reason')
|
||||
}
|
||||
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
})
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(dropStep)
|
||||
.pipe(secondStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(secondStep).not.toHaveBeenCalled()
|
||||
expect(mockLogDroppedMessage).toHaveBeenCalledWith(
|
||||
mockMessage,
|
||||
'test drop reason',
|
||||
'pipeline_result_handler'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle redirect result and return null', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const redirectStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return redirect('test redirect', 'overflow-topic', true, false)
|
||||
}
|
||||
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
})
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(redirectStep)
|
||||
.pipe(secondStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(secondStep).not.toHaveBeenCalled()
|
||||
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockPromiseScheduler,
|
||||
mockMessage,
|
||||
'overflow-topic',
|
||||
'pipeline_result_handler',
|
||||
true,
|
||||
false
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle dlq result and return null', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const testError = new Error('test error')
|
||||
const dlqStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return dlq('test dlq reason', testError)
|
||||
}
|
||||
const secondStep: SyncPreprocessingStep<{ count: number }, { count: number }> = jest.fn((input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
})
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(dlqStep)
|
||||
.pipe(secondStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(secondStep).not.toHaveBeenCalled()
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockMessage,
|
||||
testError,
|
||||
'pipeline_result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle dlq result without error and create default error', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const dlqStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return dlq('test dlq reason')
|
||||
}
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config).pipe(dlqStep).unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockMessage,
|
||||
expect.any(Error),
|
||||
'pipeline_result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
|
||||
const errorArg = (mockSendMessageToDLQ as jest.Mock).mock.calls[0][2]
|
||||
expect(errorArg.message).toBe('test dlq reason')
|
||||
})
|
||||
|
||||
it('should chain multiple synchronous steps successfully', async () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: SyncPreprocessingStep<typeof initialValue, { count: number }> = (input) => {
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: SyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = (input) => {
|
||||
return success({ count: input.count, doubled: input.count * 2 })
|
||||
}
|
||||
|
||||
const step3: SyncPreprocessingStep<{ count: number; doubled: number }, { final: string }> = (input) => {
|
||||
return success({ final: `count: ${input.count}, doubled: ${input.doubled}` })
|
||||
}
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(step1)
|
||||
.pipe(step2)
|
||||
.pipe(step3)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual({ final: 'count: 1, doubled: 2' })
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeAsync() - mixed sync/async steps', () => {
|
||||
it('should transition to AsyncResultHandlingPipeline', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const asyncPipeline = ResultHandlingPipeline.of(initialValue, mockMessage, config).pipeAsync(asyncStep)
|
||||
expect(asyncPipeline).toBeInstanceOf(AsyncResultHandlingPipeline)
|
||||
|
||||
const result = await asyncPipeline.unwrap()
|
||||
expect(result).toEqual({ count: 2 })
|
||||
})
|
||||
|
||||
it('should not execute async step when sync result is failure', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const dropStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return drop('initial drop')
|
||||
}
|
||||
const asyncStep: AsyncPreprocessingStep<{ count: number }, { executed: boolean }> = jest.fn(async () => {
|
||||
await Promise.resolve()
|
||||
return success({ executed: true })
|
||||
})
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(dropStep)
|
||||
.pipeAsync(asyncStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(asyncStep).not.toHaveBeenCalled()
|
||||
expect(mockLogDroppedMessage).toHaveBeenCalledWith(
|
||||
mockMessage,
|
||||
'initial drop',
|
||||
'async_pipeline_result_handler'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('redirect result with default parameters', () => {
|
||||
it('should use default preserveKey and awaitAck when not specified', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const redirectStep: SyncPreprocessingStep<typeof initialValue, { count: number }> = () => {
|
||||
return redirect('test redirect', 'overflow-topic')
|
||||
}
|
||||
|
||||
const result = await ResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(redirectStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockPromiseScheduler,
|
||||
mockMessage,
|
||||
'overflow-topic',
|
||||
'pipeline_result_handler',
|
||||
true, // default preserveKey
|
||||
true // default awaitAck
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('AsyncResultHandlingPipeline', () => {
|
||||
let mockKafkaProducer: KafkaProducerWrapper
|
||||
let mockPromiseScheduler: PromiseScheduler
|
||||
let mockMessage: Message
|
||||
let config: PipelineConfig
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks()
|
||||
|
||||
mockKafkaProducer = {
|
||||
producer: {} as any,
|
||||
queueMessages: jest.fn(),
|
||||
} as unknown as KafkaProducerWrapper
|
||||
|
||||
mockPromiseScheduler = {
|
||||
schedule: jest.fn(),
|
||||
} as unknown as PromiseScheduler
|
||||
|
||||
mockMessage = {
|
||||
value: Buffer.from('test message'),
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 123,
|
||||
key: 'test-key',
|
||||
headers: [],
|
||||
size: 12,
|
||||
} as Message
|
||||
|
||||
config = {
|
||||
kafkaProducer: mockKafkaProducer,
|
||||
dlqTopic: 'test-dlq',
|
||||
promiseScheduler: mockPromiseScheduler,
|
||||
}
|
||||
})
|
||||
|
||||
describe('static methods', () => {
|
||||
it('should create async pipeline using of()', async () => {
|
||||
const value = { test: 'data' }
|
||||
const pipeline = AsyncResultHandlingPipeline.of(value, mockMessage, config)
|
||||
|
||||
const result = await pipeline.unwrap()
|
||||
expect(result).toEqual(value)
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipe() - synchronous steps on async pipeline', () => {
|
||||
it('should execute sync step after async step', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
const syncStep: SyncPreprocessingStep<{ count: number }, { count: number; final: boolean }> = (input) => {
|
||||
return success({ count: input.count, final: true })
|
||||
}
|
||||
|
||||
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipeAsync(asyncStep)
|
||||
.pipe(syncStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual({ count: 2, final: true })
|
||||
})
|
||||
|
||||
it('should skip sync step when async result is failure', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const asyncStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return drop('async drop')
|
||||
}
|
||||
const syncStep: SyncPreprocessingStep<{ count: number }, { final: boolean }> = jest.fn((_input) => {
|
||||
return success({ final: true })
|
||||
})
|
||||
|
||||
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipeAsync(asyncStep)
|
||||
.pipe(syncStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(syncStep).not.toHaveBeenCalled()
|
||||
expect(mockLogDroppedMessage).toHaveBeenCalledWith(
|
||||
mockMessage,
|
||||
'async drop',
|
||||
'async_pipeline_result_handler'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('pipeAsync() - chaining async steps', () => {
|
||||
it('should chain multiple async steps', async () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: AsyncPreprocessingStep<{ count: number }, { count: number; doubled: number }> = async (
|
||||
input
|
||||
) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count, doubled: input.count * 2 })
|
||||
}
|
||||
|
||||
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipeAsync(step1)
|
||||
.pipeAsync(step2)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual({ count: 1, doubled: 2 })
|
||||
})
|
||||
|
||||
it('should stop chain when async step returns failure', async () => {
|
||||
const initialValue = { count: 0 }
|
||||
|
||||
const step1: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ count: input.count + 1 })
|
||||
}
|
||||
|
||||
const step2: AsyncPreprocessingStep<{ count: number }, { count: number }> = async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return redirect('async redirect', 'overflow-topic', false, true)
|
||||
}
|
||||
|
||||
const step3: AsyncPreprocessingStep<{ count: number }, { final: string }> = jest.fn(async (input) => {
|
||||
await Promise.resolve()
|
||||
return success({ final: `count: ${input.count}` })
|
||||
})
|
||||
|
||||
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipeAsync(step1)
|
||||
.pipeAsync(step2)
|
||||
.pipeAsync(step3)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(step3).not.toHaveBeenCalled()
|
||||
expect(mockRedirectMessageToTopic).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockPromiseScheduler,
|
||||
mockMessage,
|
||||
'overflow-topic',
|
||||
'async_pipeline_result_handler',
|
||||
false,
|
||||
true
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle async dlq result', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const testError = new Error('async error')
|
||||
const dlqStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
|
||||
await Promise.resolve()
|
||||
return dlq('async dlq reason', testError)
|
||||
}
|
||||
|
||||
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipeAsync(dlqStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockMessage,
|
||||
testError,
|
||||
'async_pipeline_result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('mixed sync and async steps', () => {
|
||||
it('should handle complex pipeline with mixed step types', async () => {
|
||||
const initialValue = { value: 'start' }
|
||||
|
||||
const syncStep1: SyncPreprocessingStep<typeof initialValue, { value: string; step1: boolean }> = (
|
||||
input
|
||||
) => {
|
||||
return success({ value: input.value + '-sync1', step1: true })
|
||||
}
|
||||
|
||||
const asyncStep1: AsyncPreprocessingStep<
|
||||
{ value: string; step1: boolean },
|
||||
{ value: string; step1: boolean; async1: boolean }
|
||||
> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
return success({ ...input, value: input.value + '-async1', async1: true })
|
||||
}
|
||||
|
||||
const syncStep2: SyncPreprocessingStep<
|
||||
{ value: string; step1: boolean; async1: boolean },
|
||||
{ final: string }
|
||||
> = (input) => {
|
||||
return success({ final: `${input.value}-sync2` })
|
||||
}
|
||||
|
||||
const result = await AsyncResultHandlingPipeline.of(initialValue, mockMessage, config)
|
||||
.pipe(syncStep1)
|
||||
.pipeAsync(asyncStep1)
|
||||
.pipe(syncStep2)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual({ final: 'start-sync1-async1-sync2' })
|
||||
})
|
||||
})
|
||||
|
||||
describe('error handling', () => {
|
||||
it('should propagate async step errors', async () => {
|
||||
const initialValue = { count: 1 }
|
||||
const errorStep: AsyncPreprocessingStep<typeof initialValue, { count: number }> = async () => {
|
||||
await Promise.resolve()
|
||||
throw new Error('Async step failed')
|
||||
}
|
||||
|
||||
await expect(
|
||||
AsyncResultHandlingPipeline.of(initialValue, mockMessage, config).pipeAsync(errorStep).unwrap()
|
||||
).rejects.toThrow('Async step failed')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('Integration tests', () => {
|
||||
let mockKafkaProducer: KafkaProducerWrapper
|
||||
let mockPromiseScheduler: PromiseScheduler
|
||||
let mockMessage: Message
|
||||
let config: PipelineConfig
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks()
|
||||
|
||||
mockKafkaProducer = {
|
||||
producer: {} as any,
|
||||
queueMessages: jest.fn(),
|
||||
} as unknown as KafkaProducerWrapper
|
||||
|
||||
mockPromiseScheduler = {
|
||||
schedule: jest.fn(),
|
||||
} as unknown as PromiseScheduler
|
||||
|
||||
mockMessage = {
|
||||
value: Buffer.from('test message'),
|
||||
topic: 'test-topic',
|
||||
partition: 0,
|
||||
offset: 123,
|
||||
key: 'test-key',
|
||||
headers: [],
|
||||
size: 12,
|
||||
} as Message
|
||||
|
||||
config = {
|
||||
kafkaProducer: mockKafkaProducer,
|
||||
dlqTopic: 'test-dlq',
|
||||
promiseScheduler: mockPromiseScheduler,
|
||||
}
|
||||
})
|
||||
|
||||
it('should handle realistic event processing pipeline', async () => {
|
||||
interface EventInput {
|
||||
rawEvent: string
|
||||
}
|
||||
|
||||
interface ParsedEvent {
|
||||
eventType: string
|
||||
userId: string
|
||||
}
|
||||
|
||||
interface ValidatedEvent extends ParsedEvent {
|
||||
isValid: boolean
|
||||
}
|
||||
|
||||
interface ProcessedEvent extends ValidatedEvent {
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
const parseStep: SyncPreprocessingStep<EventInput, ParsedEvent> = (input) => {
|
||||
if (input.rawEvent === 'invalid') {
|
||||
return drop('Invalid event format')
|
||||
}
|
||||
return success({
|
||||
eventType: 'pageview',
|
||||
userId: 'user123',
|
||||
})
|
||||
}
|
||||
|
||||
const validateStep: AsyncPreprocessingStep<ParsedEvent, ValidatedEvent> = async (input) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1))
|
||||
if (input.userId === 'blocked') {
|
||||
return redirect('User blocked', 'blocked-events-topic')
|
||||
}
|
||||
return success({
|
||||
...input,
|
||||
isValid: true,
|
||||
})
|
||||
}
|
||||
|
||||
const processStep: SyncPreprocessingStep<ValidatedEvent, ProcessedEvent> = (input) => {
|
||||
return success({
|
||||
...input,
|
||||
timestamp: '2023-01-01T00:00:00Z',
|
||||
})
|
||||
}
|
||||
|
||||
const result = await ResultHandlingPipeline.of({ rawEvent: 'test-event' }, mockMessage, config)
|
||||
.pipe(parseStep)
|
||||
.pipeAsync(validateStep)
|
||||
.pipe(processStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toEqual({
|
||||
eventType: 'pageview',
|
||||
userId: 'user123',
|
||||
isValid: true,
|
||||
timestamp: '2023-01-01T00:00:00Z',
|
||||
})
|
||||
})
|
||||
|
||||
it('should handle pipeline failure at different stages', async () => {
|
||||
const parseStep: SyncPreprocessingStep<{ rawEvent: string }, { parsed: boolean }> = () => {
|
||||
return success({ parsed: true })
|
||||
}
|
||||
|
||||
const validateStep: AsyncPreprocessingStep<{ parsed: boolean }, { validated: boolean }> = async () => {
|
||||
await Promise.resolve()
|
||||
return dlq('Validation failed', new Error('Invalid data'))
|
||||
}
|
||||
|
||||
const processStep: SyncPreprocessingStep<{ validated: boolean }, { processed: boolean }> = jest.fn(() => {
|
||||
return success({ processed: true })
|
||||
})
|
||||
|
||||
const result = await ResultHandlingPipeline.of({ rawEvent: 'test' }, mockMessage, config)
|
||||
.pipe(parseStep)
|
||||
.pipeAsync(validateStep)
|
||||
.pipe(processStep)
|
||||
.unwrap()
|
||||
|
||||
expect(result).toBeNull()
|
||||
expect(processStep).not.toHaveBeenCalled()
|
||||
expect(mockSendMessageToDLQ).toHaveBeenCalledWith(
|
||||
mockKafkaProducer,
|
||||
mockMessage,
|
||||
expect.any(Error),
|
||||
'async_pipeline_result_handler',
|
||||
'test-dlq'
|
||||
)
|
||||
})
|
||||
})
|
||||
@@ -1,168 +0,0 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import {
|
||||
AsyncPreprocessingStep,
|
||||
AsyncProcessingPipeline,
|
||||
ProcessingPipeline,
|
||||
ProcessingResult,
|
||||
SyncPreprocessingStep,
|
||||
} from '../../ingestion/processing-pipeline'
|
||||
import { KafkaProducerWrapper } from '../../kafka/producer'
|
||||
import { PromiseScheduler } from '../../utils/promise-scheduler'
|
||||
import {
|
||||
PipelineStepResultType,
|
||||
isDlqResult,
|
||||
isDropResult,
|
||||
isRedirectResult,
|
||||
isSuccessResult,
|
||||
} from './event-pipeline/pipeline-step-result'
|
||||
import { logDroppedMessage, redirectMessageToTopic, sendMessageToDLQ } from './pipeline-helpers'
|
||||
|
||||
export type PipelineConfig = {
|
||||
kafkaProducer: KafkaProducerWrapper
|
||||
dlqTopic: string
|
||||
promiseScheduler: PromiseScheduler
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for handling pipeline results (DLQ, DROP, REDIRECT).
|
||||
* Contains common logic for processing non-success results.
|
||||
*/
|
||||
abstract class BaseResultHandlingPipeline<T> {
|
||||
protected constructor(
|
||||
protected originalMessage: Message,
|
||||
protected config: PipelineConfig
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Handles a pipeline result, processing non-success results appropriately.
|
||||
* Returns the value for success results, null for non-success results.
|
||||
*/
|
||||
protected async handleResult(result: ProcessingResult<T>, stepName: string): Promise<T | null> {
|
||||
if (isSuccessResult(result)) {
|
||||
return result.value
|
||||
}
|
||||
|
||||
// Handle non-success results
|
||||
await this.handleNonSuccessResult(result, stepName)
|
||||
return null
|
||||
}
|
||||
|
||||
private async handleNonSuccessResult(result: ProcessingResult<T>, stepName: string): Promise<void> {
|
||||
if (isDlqResult(result)) {
|
||||
await this.handleDlqResult(result, stepName)
|
||||
} else if (isDropResult(result)) {
|
||||
this.handleDropResult(result, stepName)
|
||||
} else if (isRedirectResult(result)) {
|
||||
await this.handleRedirectResult(result, stepName)
|
||||
}
|
||||
}
|
||||
|
||||
private async handleDlqResult(result: { reason: string; error?: unknown }, stepName: string): Promise<void> {
|
||||
await sendMessageToDLQ(
|
||||
this.config.kafkaProducer,
|
||||
this.originalMessage,
|
||||
result.error || new Error(result.reason),
|
||||
stepName,
|
||||
this.config.dlqTopic
|
||||
)
|
||||
}
|
||||
|
||||
private handleDropResult(result: { reason: string }, stepName: string): void {
|
||||
logDroppedMessage(this.originalMessage, result.reason, stepName)
|
||||
}
|
||||
|
||||
private async handleRedirectResult(
|
||||
result: {
|
||||
reason: string
|
||||
topic: string
|
||||
preserveKey?: boolean
|
||||
awaitAck?: boolean
|
||||
},
|
||||
stepName: string
|
||||
): Promise<void> {
|
||||
await redirectMessageToTopic(
|
||||
this.config.kafkaProducer,
|
||||
this.config.promiseScheduler,
|
||||
this.originalMessage,
|
||||
result.topic,
|
||||
stepName,
|
||||
result.preserveKey ?? true,
|
||||
result.awaitAck ?? true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around ProcessingPipeline that automatically handles result types (DLQ, DROP, REDIRECT)
|
||||
* and cuts execution short when encountering non-success results.
|
||||
*
|
||||
* Requires a KafkaProducerWrapper for DLQ and redirect functionality.
|
||||
*/
|
||||
export class ResultHandlingPipeline<T> extends BaseResultHandlingPipeline<T> {
|
||||
private constructor(
|
||||
private pipeline: ProcessingPipeline<T>,
|
||||
originalMessage: Message,
|
||||
config: PipelineConfig
|
||||
) {
|
||||
super(originalMessage, config)
|
||||
}
|
||||
|
||||
pipe<U>(step: SyncPreprocessingStep<T, U>, _stepName?: string): ResultHandlingPipeline<U> {
|
||||
const newPipeline = this.pipeline.pipe(step)
|
||||
return new ResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
|
||||
}
|
||||
|
||||
pipeAsync<U>(step: AsyncPreprocessingStep<T, U>, _stepName?: string): AsyncResultHandlingPipeline<U> {
|
||||
const newPipeline = this.pipeline.pipeAsync(step)
|
||||
return new AsyncResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
|
||||
}
|
||||
|
||||
async unwrap(): Promise<T | null> {
|
||||
const result = this.pipeline.unwrap()
|
||||
return this.handleResult(result, 'pipeline_result_handler')
|
||||
}
|
||||
|
||||
static of<T>(value: T, originalMessage: Message, config: PipelineConfig): ResultHandlingPipeline<T> {
|
||||
const pipeline = ProcessingPipeline.of(value)
|
||||
return new ResultHandlingPipeline(pipeline, originalMessage, config)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around AsyncProcessingPipeline that automatically handles result types (DLQ, DROP, REDIRECT)
|
||||
* and cuts execution short when encountering non-success results.
|
||||
*
|
||||
* Requires a KafkaProducerWrapper for DLQ and redirect functionality.
|
||||
*/
|
||||
export class AsyncResultHandlingPipeline<T> extends BaseResultHandlingPipeline<T> {
|
||||
constructor(
|
||||
private pipeline: AsyncProcessingPipeline<T>,
|
||||
originalMessage: Message,
|
||||
config: PipelineConfig
|
||||
) {
|
||||
super(originalMessage, config)
|
||||
}
|
||||
|
||||
pipe<U>(step: SyncPreprocessingStep<T, U>, _stepName?: string): AsyncResultHandlingPipeline<U> {
|
||||
const newPipeline = this.pipeline.pipe(step)
|
||||
return new AsyncResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
|
||||
}
|
||||
|
||||
pipeAsync<U>(step: AsyncPreprocessingStep<T, U>, _stepName?: string): AsyncResultHandlingPipeline<U> {
|
||||
const newPipeline = this.pipeline.pipeAsync(step)
|
||||
return new AsyncResultHandlingPipeline(newPipeline, this.originalMessage, this.config)
|
||||
}
|
||||
|
||||
async unwrap(): Promise<T | null> {
|
||||
const result = await this.pipeline.unwrap()
|
||||
return this.handleResult(result, 'async_pipeline_result_handler')
|
||||
}
|
||||
|
||||
static of<T>(value: T, originalMessage: Message, config: PipelineConfig): AsyncResultHandlingPipeline<T> {
|
||||
const pipeline = ProcessingPipeline.of(value).pipeAsync((v) =>
|
||||
Promise.resolve({ type: PipelineStepResultType.OK, value: v })
|
||||
)
|
||||
return new AsyncResultHandlingPipeline(pipeline, originalMessage, config)
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
import { createApplyDropRestrictionsStep } from '../../../src/ingestion/event-preprocessing/apply-drop-events-restrictions'
|
||||
import { drop, ok } from '../../../src/ingestion/pipelines/results'
|
||||
import { EventHeaders } from '../../../src/types'
|
||||
import { EventIngestionRestrictionManager } from '../../../src/utils/event-ingestion-restriction-manager'
|
||||
import { drop, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
|
||||
describe('createApplyDropRestrictionsStep', () => {
|
||||
let eventIngestionRestrictionManager: EventIngestionRestrictionManager
|
||||
@@ -28,7 +28,7 @@ describe('createApplyDropRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldDropEvent).toHaveBeenCalledWith('valid-token-123', 'user-456')
|
||||
})
|
||||
|
||||
@@ -60,7 +60,7 @@ describe('createApplyDropRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldDropEvent).toHaveBeenCalledWith(undefined, undefined)
|
||||
})
|
||||
|
||||
@@ -73,7 +73,7 @@ describe('createApplyDropRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldDropEvent).toHaveBeenCalledWith(undefined, undefined)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,9 +2,9 @@ import {
|
||||
OverflowConfig,
|
||||
createApplyForceOverflowRestrictionsStep,
|
||||
} from '../../../src/ingestion/event-preprocessing/apply-force-overflow-restrictions'
|
||||
import { ok, redirect } from '../../../src/ingestion/pipelines/results'
|
||||
import { EventHeaders } from '../../../src/types'
|
||||
import { EventIngestionRestrictionManager } from '../../../src/utils/event-ingestion-restriction-manager'
|
||||
import { redirect, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
|
||||
describe('createApplyForceOverflowRestrictionsStep', () => {
|
||||
let eventIngestionRestrictionManager: EventIngestionRestrictionManager
|
||||
@@ -39,7 +39,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldForceOverflow).toHaveBeenCalledWith('valid-token-123', 'user-456')
|
||||
// shouldSkipPerson should not be called if not forcing overflow
|
||||
expect(eventIngestionRestrictionManager.shouldSkipPerson).not.toHaveBeenCalled()
|
||||
@@ -101,7 +101,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldForceOverflow).toHaveBeenCalledWith(undefined, undefined)
|
||||
})
|
||||
|
||||
@@ -114,7 +114,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldForceOverflow).toHaveBeenCalledWith(undefined, undefined)
|
||||
})
|
||||
|
||||
@@ -138,7 +138,7 @@ describe('createApplyForceOverflowRestrictionsStep', () => {
|
||||
|
||||
const result = disabledStep(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(eventIngestionRestrictionManager.shouldForceOverflow).not.toHaveBeenCalled()
|
||||
expect(eventIngestionRestrictionManager.shouldSkipPerson).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { createApplyPersonProcessingRestrictionsStep } from '../../../src/ingestion/event-preprocessing/apply-person-processing-restrictions'
|
||||
import { ok } from '../../../src/ingestion/pipelines/results'
|
||||
import { IncomingEventWithTeam } from '../../../src/types'
|
||||
import { EventIngestionRestrictionManager } from '../../../src/utils/event-ingestion-restriction-manager'
|
||||
import { success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
|
||||
describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
let eventIngestionRestrictionManager: EventIngestionRestrictionManager
|
||||
@@ -50,7 +50,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(input.eventWithTeam.event.properties).toEqual({ defaultProp: 'defaultValue' })
|
||||
expect(input.eventWithTeam.event.token).toBe('valid-token-abc')
|
||||
expect(input.eventWithTeam.event.distinct_id).toBe('user-123')
|
||||
@@ -67,7 +67,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(input.eventWithTeam.event.properties?.$process_person_profile).toBe(false)
|
||||
expect(eventIngestionRestrictionManager.shouldSkipPerson).toHaveBeenCalledWith(
|
||||
'restricted-token-def',
|
||||
@@ -85,7 +85,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(input.eventWithTeam.event.properties?.$process_person_profile).toBe(false)
|
||||
expect(eventIngestionRestrictionManager.shouldSkipPerson).toHaveBeenCalledWith(
|
||||
'opt-out-token-ghi',
|
||||
@@ -106,7 +106,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(input.eventWithTeam.event.properties).toMatchObject({
|
||||
customProp: 'customValue',
|
||||
$set: { a: 1, b: 2 },
|
||||
@@ -133,7 +133,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(input.eventWithTeam.event.properties).toEqual({ customProp: 'customValue' })
|
||||
expect(eventIngestionRestrictionManager.shouldSkipPerson).toHaveBeenCalledWith(
|
||||
undefined,
|
||||
@@ -154,7 +154,7 @@ describe('createApplyPersonProcessingRestrictionsStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(input.eventWithTeam.event.properties).toMatchObject({
|
||||
customProp: 'customValue',
|
||||
$process_person_profile: false,
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { createParseHeadersStep } from '../../../src/ingestion/event-preprocessing/parse-headers'
|
||||
import { ok } from '../../../src/ingestion/pipelines/results'
|
||||
import { parseEventHeaders } from '../../../src/kafka/consumer'
|
||||
import { EventHeaders } from '../../../src/types'
|
||||
import { success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../src/kafka/consumer', () => ({
|
||||
@@ -35,7 +35,7 @@ describe('createParseHeadersStep', () => {
|
||||
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -56,7 +56,7 @@ describe('createParseHeadersStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
...input,
|
||||
headers: expectedHeaders,
|
||||
})
|
||||
@@ -76,7 +76,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -92,7 +92,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -116,7 +116,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -140,7 +140,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -164,7 +164,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -188,7 +188,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
|
||||
@@ -214,7 +214,7 @@ describe('createParseHeadersStep', () => {
|
||||
}
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(success({ ...input, headers: expectedHeaders }))
|
||||
expect(result).toEqual(ok({ ...input, headers: expectedHeaders }))
|
||||
expect(mockParseEventHeaders).toHaveBeenCalledWith(input.message.headers)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { Message } from 'node-rdkafka'
|
||||
|
||||
import { createParseKafkaMessageStep } from '../../../src/ingestion/event-preprocessing/parse-kafka-message'
|
||||
import { drop, ok } from '../../../src/ingestion/pipelines/results'
|
||||
import { logger } from '../../../src/utils/logger'
|
||||
import { drop, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../src/utils/logger')
|
||||
@@ -42,7 +42,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
@@ -83,7 +83,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
@@ -115,7 +115,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
@@ -150,7 +150,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
@@ -187,7 +187,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
@@ -325,7 +325,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
@@ -357,7 +357,7 @@ describe('createParseKafkaMessageStep', () => {
|
||||
const result = step(input)
|
||||
|
||||
expect(result).toEqual(
|
||||
success({
|
||||
ok({
|
||||
message: mockMessage,
|
||||
event: {
|
||||
event: {
|
||||
|
||||
@@ -4,8 +4,8 @@ import { DB } from '~/utils/db/db'
|
||||
import { TeamManager } from '~/utils/team-manager'
|
||||
|
||||
import { createResolveTeamStep } from '../../../src/ingestion/event-preprocessing/resolve-team'
|
||||
import { drop, ok } from '../../../src/ingestion/pipelines/results'
|
||||
import { EventHeaders, Hub, IncomingEvent, Team } from '../../../src/types'
|
||||
import { drop, success } from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { getMetricValues, resetMetrics } from '../../helpers/metrics'
|
||||
|
||||
const pipelineEvent = {
|
||||
@@ -116,7 +116,7 @@ describe('createResolveTeamStep()', () => {
|
||||
}
|
||||
const response = await step(input)
|
||||
expect(response).toEqual(
|
||||
success({
|
||||
ok({
|
||||
...input,
|
||||
eventWithTeam: {
|
||||
event: { ...pipelineEvent, token: teamTwoToken },
|
||||
@@ -162,7 +162,7 @@ describe('createResolveTeamStep()', () => {
|
||||
}
|
||||
const response = await step(input)
|
||||
expect(response).toEqual(
|
||||
success({
|
||||
ok({
|
||||
...input,
|
||||
eventWithTeam: {
|
||||
event: { ...pipelineEvent, team_id: 3, token: teamTwoToken },
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
import { createValidateEventUuidStep } from '../../../src/ingestion/event-preprocessing/validate-event-uuid'
|
||||
import { PipelineResultType, drop, ok } from '../../../src/ingestion/pipelines/results'
|
||||
import { Hub, IncomingEventWithTeam } from '../../../src/types'
|
||||
import {
|
||||
PipelineStepResultType,
|
||||
drop,
|
||||
success,
|
||||
} from '../../../src/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { captureIngestionWarning } from '../../../src/worker/ingestion/utils'
|
||||
import { getMetricValues, resetMetrics } from '../../helpers/metrics'
|
||||
|
||||
@@ -53,7 +49,7 @@ describe('createValidateEventUuidStep', () => {
|
||||
const input = { eventWithTeam: mockEventWithTeam }
|
||||
const result = await step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
expect(mockCaptureIngestionWarning).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
@@ -172,9 +168,9 @@ describe('createValidateEventUuidStep', () => {
|
||||
const input = { eventWithTeam: mockEventWithTeam }
|
||||
const result = await step(input)
|
||||
|
||||
expect(result).toEqual(success(input))
|
||||
expect(result).toEqual(ok(input))
|
||||
|
||||
if (result.type === PipelineStepResultType.OK) {
|
||||
if (result.type === PipelineResultType.OK) {
|
||||
expect(result.value.eventWithTeam.event.token).toBe('test-token-123')
|
||||
expect(result.value.eventWithTeam.event.distinct_id).toBe('test-user-456')
|
||||
expect(result.value.eventWithTeam.event.event).toBe('test-event')
|
||||
|
||||
@@ -2,7 +2,7 @@ import { DateTime } from 'luxon'
|
||||
|
||||
import { PluginEvent } from '@posthog/plugin-scaffold'
|
||||
|
||||
import { PipelineStepResultType, isSuccessResult } from '~/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { PipelineResultType, isOkResult } from '~/ingestion/pipelines/results'
|
||||
import { BatchWritingPersonsStoreForBatch } from '~/worker/ingestion/persons/batch-writing-person-store'
|
||||
|
||||
import { Hub, Team } from '../../../../src/types'
|
||||
@@ -72,8 +72,8 @@ describe('processPersonsStep()', () => {
|
||||
)
|
||||
)
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
const [resEvent, resPerson, kafkaAck] = result.value
|
||||
expect(resEvent).toEqual(pluginEvent)
|
||||
expect(resPerson).toEqual(
|
||||
@@ -121,8 +121,8 @@ describe('processPersonsStep()', () => {
|
||||
)
|
||||
)
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
const [resEvent, resPerson, kafkaAck] = result.value
|
||||
expect(resEvent).toEqual({
|
||||
...event,
|
||||
|
||||
@@ -3,8 +3,8 @@ import { v4 } from 'uuid'
|
||||
|
||||
import { PluginEvent } from '@posthog/plugin-scaffold'
|
||||
|
||||
import { dlq, ok, redirect } from '~/ingestion/pipelines/results'
|
||||
import { forSnapshot } from '~/tests/helpers/snapshots'
|
||||
import { dlq, redirect, success } from '~/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { BatchWritingGroupStoreForBatch } from '~/worker/ingestion/groups/batch-writing-group-store'
|
||||
import { BatchWritingPersonsStoreForBatch } from '~/worker/ingestion/persons/batch-writing-person-store'
|
||||
|
||||
@@ -188,7 +188,7 @@ describe('EventPipelineRunner', () => {
|
||||
)
|
||||
|
||||
jest.mocked(processPersonsStep).mockResolvedValue(
|
||||
success([
|
||||
ok([
|
||||
pluginEvent,
|
||||
{ person, personUpdateProperties: {}, get: () => Promise.resolve(person) } as any,
|
||||
Promise.resolve(),
|
||||
|
||||
@@ -5,13 +5,8 @@ import { DateTime } from 'luxon'
|
||||
import { PluginEvent, Properties } from '@posthog/plugin-scaffold'
|
||||
|
||||
import { KAFKA_INGESTION_WARNINGS, KAFKA_PERSON, KAFKA_PERSON_DISTINCT_ID } from '~/config/kafka-topics'
|
||||
import { PipelineResultType, isDlqResult, isOkResult, isRedirectResult } from '~/ingestion/pipelines/results'
|
||||
import { Clickhouse } from '~/tests/helpers/clickhouse'
|
||||
import {
|
||||
PipelineStepResultType,
|
||||
isDlqResult,
|
||||
isRedirectResult,
|
||||
isSuccessResult,
|
||||
} from '~/worker/ingestion/event-pipeline/pipeline-step-result'
|
||||
import { fromInternalPerson } from '~/worker/ingestion/persons/person-update-batch'
|
||||
|
||||
import { TopicMessage } from '../../../src/kafka/producer'
|
||||
@@ -363,8 +358,8 @@ describe('PersonState.processEvent()', () => {
|
||||
await hub.db.kafkaProducer.flush()
|
||||
await kafkaAcks
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
const fakePerson = result.value
|
||||
expect(fakePerson).toEqual(
|
||||
expect.objectContaining({
|
||||
@@ -384,7 +379,7 @@ describe('PersonState.processEvent()', () => {
|
||||
// verify there are no Postgres distinct_ids
|
||||
const distinctIds = await fetchDistinctIdValues(
|
||||
hub.db.postgres,
|
||||
isSuccessResult(result) ? (result.value as InternalPerson) : ({} as InternalPerson)
|
||||
isOkResult(result) ? (result.value as InternalPerson) : ({} as InternalPerson)
|
||||
)
|
||||
expect(distinctIds).toEqual(expect.arrayContaining([]))
|
||||
})
|
||||
@@ -500,8 +495,8 @@ describe('PersonState.processEvent()', () => {
|
||||
await hub.db.kafkaProducer.flush()
|
||||
await kafkaAcks2
|
||||
|
||||
expect(result2.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result2)) {
|
||||
expect(result2.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result2)) {
|
||||
const fakePerson = result2.value
|
||||
expect(fakePerson).toEqual(
|
||||
expect.objectContaining({
|
||||
@@ -560,8 +555,8 @@ describe('PersonState.processEvent()', () => {
|
||||
await hub.db.kafkaProducer.flush()
|
||||
await kafkaAcks2
|
||||
|
||||
expect(result2.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result2)) {
|
||||
expect(result2.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result2)) {
|
||||
const fakePerson = result2.value
|
||||
expect(fakePerson.force_upgrade).toBeUndefined()
|
||||
}
|
||||
@@ -614,8 +609,8 @@ describe('PersonState.processEvent()', () => {
|
||||
await hub.db.kafkaProducer.flush()
|
||||
await kafkaAcks
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
const person = result.value
|
||||
expect(person).toEqual(
|
||||
expect.objectContaining({
|
||||
@@ -646,8 +641,8 @@ describe('PersonState.processEvent()', () => {
|
||||
uuid: new UUIDT().toString(),
|
||||
properties: {},
|
||||
}).processEvent()
|
||||
expect(personVerifyResult.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(personVerifyResult)) {
|
||||
expect(personVerifyResult.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(personVerifyResult)) {
|
||||
expect(personVerifyResult.value.properties).toEqual({ $creator_event_uuid: originalEventUuid, c: 420 })
|
||||
}
|
||||
|
||||
@@ -664,8 +659,8 @@ describe('PersonState.processEvent()', () => {
|
||||
hub,
|
||||
false
|
||||
).processEvent()
|
||||
expect(processPersonFalseResult.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(processPersonFalseResult)) {
|
||||
expect(processPersonFalseResult.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(processPersonFalseResult)) {
|
||||
expect(processPersonFalseResult.value.properties).toEqual({})
|
||||
}
|
||||
})
|
||||
@@ -1095,8 +1090,8 @@ describe('PersonState.processEvent()', () => {
|
||||
const context = personS.getContext()
|
||||
await flushPersonStoreToKafka(hub, context.personStore, kafkaAcks)
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
const person = result.value
|
||||
expect(person).toEqual(
|
||||
expect.objectContaining({
|
||||
@@ -1251,8 +1246,8 @@ describe('PersonState.processEvent()', () => {
|
||||
await flushPersonStoreToKafka(hub, context.personStore, kafkaAcks)
|
||||
|
||||
// Return logic is still unaware that merge happened
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
const person = result.value
|
||||
expect(person).toMatchObject({
|
||||
id: expect.any(String),
|
||||
@@ -4041,7 +4036,7 @@ describe('PersonState.processEvent()', () => {
|
||||
|
||||
const [result] = await processor.processEvent()
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.DLQ)
|
||||
expect(result.type).toBe(PipelineResultType.DLQ)
|
||||
if (isDlqResult(result)) {
|
||||
expect(result.reason).toBe('Merge limit exceeded')
|
||||
expect((result.error as any).message).toContain('person_merge_move_limit_hit')
|
||||
@@ -4070,7 +4065,7 @@ describe('PersonState.processEvent()', () => {
|
||||
|
||||
const [result] = await processor.processEvent()
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.REDIRECT)
|
||||
expect(result.type).toBe(PipelineResultType.REDIRECT)
|
||||
if (isRedirectResult(result)) {
|
||||
expect(result.reason).toBe('Event redirected to async merge topic')
|
||||
expect(result.topic).toBe('async-merge-topic')
|
||||
@@ -4115,8 +4110,8 @@ describe('PersonState.processEvent()', () => {
|
||||
|
||||
const [result] = await processor.processEvent()
|
||||
|
||||
expect(result.type).toBe(PipelineStepResultType.OK)
|
||||
if (isSuccessResult(result)) {
|
||||
expect(result.type).toBe(PipelineResultType.OK)
|
||||
if (isOkResult(result)) {
|
||||
expect(result.value).toEqual(mockPerson)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user