feat(02-03): create alertService with deduplication and email

- evaluateAndAlert() iterates ProbeResults and skips healthy probes
- Maps 'down' -> 'service_down', 'degraded' -> 'service_degraded'
- Deduplication via AlertEventModel.findRecentByService with configurable cooldown
- Creates alert_events row before sending email (suppression skips both)
- Recipient read from process.env.EMAIL_WEEKLY_RECIPIENT (never hardcoded)
- createTransporter() called inside function scope (Firebase Secret timing fix)
- Email failures caught and logged, never re-thrown
This commit is contained in:
admin
2026-02-24 14:28:20 -05:00
parent 520b6b1fe2
commit 91f609cf92
6 changed files with 613 additions and 2 deletions

View File

@@ -0,0 +1,102 @@
-- ============================================================
-- CLEANUP OLD DATA - Run in Supabase SQL Editor
-- ============================================================
-- Removes stale data that accumulates over time without
-- impacting application functionality.
--
-- SAFE TO RUN: All deleted data is either intermediate
-- processing artifacts or analytics logs. Core document
-- data (documents, document_chunks, analysis_data) is
-- never touched by DELETE statements.
--
-- Skips tables that don't exist yet (safe for any state).
--
-- RECOMMENDATION: Run the check_table_sizes.sql query first
-- to see how much data will be affected.
-- ============================================================
DO $$
DECLARE
deleted bigint;
BEGIN
-- 1. Processing jobs: completed/failed older than 30 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'processing_jobs') THEN
DELETE FROM processing_jobs WHERE status IN ('completed', 'failed') AND completed_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'processing_jobs: deleted % rows', deleted;
END IF;
-- 2. Execution events: older than 30 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'execution_events') THEN
DELETE FROM execution_events WHERE created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'execution_events: deleted % rows', deleted;
END IF;
-- 3. Session events: older than 30 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'session_events') THEN
DELETE FROM session_events WHERE created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'session_events: deleted % rows', deleted;
END IF;
-- 4. Performance metrics: older than 90 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'performance_metrics') THEN
DELETE FROM performance_metrics WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'performance_metrics: deleted % rows', deleted;
END IF;
-- 5. Vector similarity searches: older than 90 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'vector_similarity_searches') THEN
DELETE FROM vector_similarity_searches WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'vector_similarity_searches: deleted % rows', deleted;
END IF;
-- 6. Service health checks: older than 30 days (INFR-01)
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'service_health_checks') THEN
DELETE FROM service_health_checks WHERE created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'service_health_checks: deleted % rows', deleted;
END IF;
-- 7. Alert events: resolved older than 30 days (INFR-01)
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'alert_events') THEN
DELETE FROM alert_events WHERE status = 'resolved' AND created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'alert_events: deleted % rows', deleted;
END IF;
-- 8. Agent executions: older than 90 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'agent_executions') THEN
DELETE FROM agent_executions WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'agent_executions: deleted % rows', deleted;
END IF;
-- 9. Processing quality metrics: older than 90 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'processing_quality_metrics') THEN
DELETE FROM processing_quality_metrics WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'processing_quality_metrics: deleted % rows', deleted;
END IF;
-- 10. Agentic RAG sessions: completed older than 90 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'agentic_rag_sessions') THEN
DELETE FROM agentic_rag_sessions WHERE status IN ('completed', 'failed') AND created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'agentic_rag_sessions: deleted % rows', deleted;
END IF;
-- 11. Null out extracted_text for completed documents older than 30 days
IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'documents') THEN
UPDATE documents SET extracted_text = NULL
WHERE status = 'completed' AND analysis_data IS NOT NULL AND extracted_text IS NOT NULL AND created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted = ROW_COUNT;
RAISE NOTICE 'documents extracted_text nulled: % rows', deleted;
END IF;
RAISE NOTICE '--- CLEANUP COMPLETE ---';
END $$;