diff --git a/backend/src/models/migrations/013_create_processing_events_table.sql b/backend/src/models/migrations/013_create_processing_events_table.sql new file mode 100644 index 0000000..46273c5 --- /dev/null +++ b/backend/src/models/migrations/013_create_processing_events_table.sql @@ -0,0 +1,33 @@ +-- Migration: Create document_processing_events table for analytics +-- Created: 2026-02-24 +-- Purpose: Establish analytics foundation for tracking document processing lifecycle events. +-- Phase 2 of the monitoring feature — fire-and-forget instrumentation writes to +-- this table without blocking the processing pipeline. + +-- ============================================================================= +-- TABLE: document_processing_events +-- Records each lifecycle event during document processing for analytics and audit. +-- Writes are always fire-and-forget (never awaited on the critical path). +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS document_processing_events ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + document_id UUID NOT NULL, + user_id UUID NOT NULL, + event_type TEXT NOT NULL CHECK (event_type IN ('upload_started', 'processing_started', 'completed', 'failed')), + duration_ms INTEGER, -- nullable: not applicable for all event types + error_message TEXT, -- nullable: failure details for 'failed' events + stage TEXT, -- nullable: which processing stage the event occurred in + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Index required for 30-day retention queries (deleteProcessingEventsOlderThan) +CREATE INDEX IF NOT EXISTS idx_document_processing_events_created_at + ON document_processing_events(created_at); + +-- Index for per-document event history queries +CREATE INDEX IF NOT EXISTS idx_document_processing_events_document_id + ON document_processing_events(document_id); + +-- Enable RLS (service role key bypasses RLS automatically — explicit policies added in Phase 3) +ALTER TABLE document_processing_events ENABLE ROW LEVEL SECURITY; diff --git a/backend/src/services/analyticsService.ts b/backend/src/services/analyticsService.ts new file mode 100644 index 0000000..5719213 --- /dev/null +++ b/backend/src/services/analyticsService.ts @@ -0,0 +1,88 @@ +import { getSupabaseServiceClient } from '../config/supabase'; +import { logger } from '../utils/logger'; + +// ============================================================================= +// Types +// ============================================================================= + +export interface ProcessingEventData { + document_id: string; + user_id: string; + event_type: 'upload_started' | 'processing_started' | 'completed' | 'failed'; + duration_ms?: number; + error_message?: string; + stage?: string; +} + +// ============================================================================= +// recordProcessingEvent +// ============================================================================= + +/** + * Fire-and-forget analytics write for document processing lifecycle events. + * + * Return type is void (NOT Promise) to prevent accidental await on the + * critical processing path. Any Supabase write failure is logged but never + * thrown — analytics must never block or break document processing. + * + * Architecture decision: Analytics writes are always fire-and-forget. + * See STATE.md: "Analytics writes are always fire-and-forget (never await on critical path)" + */ +export function recordProcessingEvent(data: ProcessingEventData): void { + const supabase = getSupabaseServiceClient(); + + void supabase + .from('document_processing_events') + .insert({ + document_id: data.document_id, + user_id: data.user_id, + event_type: data.event_type, + duration_ms: data.duration_ms ?? null, + error_message: data.error_message ?? null, + stage: data.stage ?? null, + created_at: new Date().toISOString(), + }) + .then(({ error }) => { + if (error) { + logger.error('analyticsService: failed to insert processing event', { + error: error.message, + document_id: data.document_id, + event_type: data.event_type, + }); + } + }); +} + +// ============================================================================= +// deleteProcessingEventsOlderThan +// ============================================================================= + +/** + * Delete document_processing_events rows older than `days` days. + * + * Used by the retention cleanup job to enforce data retention policy. + * Returns the count of rows deleted. + * + * Follows the same pattern as HealthCheckModel.deleteOlderThan(). + */ +export async function deleteProcessingEventsOlderThan(days: number): Promise { + const cutoff = new Date(Date.now() - days * 86400000).toISOString(); + const supabase = getSupabaseServiceClient(); + + const { data, error } = await supabase + .from('document_processing_events') + .delete() + .lt('created_at', cutoff) + .select(); + + if (error) { + logger.error('analyticsService: failed to delete old processing events', { + error: error.message, + days, + cutoff, + }); + throw new Error(`failed to delete processing events older than ${days} days — ${error.message}`); + } + + return data ? data.length : 0; +}