Files
cim_summary/backend/sql/setup_pg_cron_cleanup.sql
admin 91f609cf92 feat(02-03): create alertService with deduplication and email
- evaluateAndAlert() iterates ProbeResults and skips healthy probes
- Maps 'down' -> 'service_down', 'degraded' -> 'service_degraded'
- Deduplication via AlertEventModel.findRecentByService with configurable cooldown
- Creates alert_events row before sending email (suppression skips both)
- Recipient read from process.env.EMAIL_WEEKLY_RECIPIENT (never hardcoded)
- createTransporter() called inside function scope (Firebase Secret timing fix)
- Email failures caught and logged, never re-thrown
2026-02-24 14:28:20 -05:00

146 lines
5.5 KiB
PL/PgSQL

-- ============================================================
-- ALTERNATIVE: PG_CRON AUTOMATED CLEANUP
-- ============================================================
-- NOTE: The primary cleanup runs as a Firebase scheduled
-- function (cleanupOldData in index.ts). This pg_cron
-- approach is an ALTERNATIVE if you prefer database-level
-- scheduling instead.
--
-- Supabase includes pg_cron. This script creates scheduled
-- jobs that automatically enforce retention policies.
--
-- PREREQUISITE: pg_cron extension must be enabled.
-- Go to Supabase Dashboard → Database → Extensions → enable pg_cron
--
-- SCHEDULE: Runs daily at 03:00 UTC (off-peak)
-- ============================================================
-- Enable the pg_cron extension (if not already enabled)
CREATE EXTENSION IF NOT EXISTS pg_cron;
-- Grant usage to postgres role (required on Supabase)
GRANT USAGE ON SCHEMA cron TO postgres;
-- ============================================================
-- Create the cleanup function
-- ============================================================
CREATE OR REPLACE FUNCTION public.cleanup_old_data()
RETURNS jsonb
LANGUAGE plpgsql
SECURITY DEFINER
AS $$
DECLARE
result jsonb := '{}'::jsonb;
deleted_count bigint;
BEGIN
-- 1. Processing jobs: completed/failed older than 30 days
DELETE FROM processing_jobs
WHERE status IN ('completed', 'failed')
AND completed_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('processing_jobs', deleted_count);
-- 2. Execution events: older than 30 days
DELETE FROM execution_events
WHERE created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('execution_events', deleted_count);
-- 3. Session events: older than 30 days
DELETE FROM session_events
WHERE created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('session_events', deleted_count);
-- 4. Performance metrics: older than 90 days
DELETE FROM performance_metrics
WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('performance_metrics', deleted_count);
-- 5. Vector similarity searches: older than 90 days
DELETE FROM vector_similarity_searches
WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('vector_similarity_searches', deleted_count);
-- 6. Service health checks: older than 30 days (INFR-01)
DELETE FROM service_health_checks
WHERE created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('service_health_checks', deleted_count);
-- 7. Alert events: resolved older than 30 days (INFR-01)
DELETE FROM alert_events
WHERE status = 'resolved'
AND created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('alert_events', deleted_count);
-- 8. Agent executions: older than 90 days
DELETE FROM agent_executions
WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('agent_executions', deleted_count);
-- 9. Processing quality metrics: older than 90 days
DELETE FROM processing_quality_metrics
WHERE created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('processing_quality_metrics', deleted_count);
-- 10. Agentic RAG sessions: completed older than 90 days
DELETE FROM agentic_rag_sessions
WHERE status IN ('completed', 'failed')
AND created_at < NOW() - INTERVAL '90 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('agentic_rag_sessions', deleted_count);
-- 11. Null out extracted_text for completed documents older than 30 days
UPDATE documents
SET extracted_text = NULL
WHERE status = 'completed'
AND analysis_data IS NOT NULL
AND extracted_text IS NOT NULL
AND created_at < NOW() - INTERVAL '30 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
result := result || jsonb_build_object('documents_text_nulled', deleted_count);
RETURN result;
END;
$$;
-- ============================================================
-- Schedule the cron job: daily at 03:00 UTC
-- ============================================================
SELECT cron.schedule(
'daily-cleanup-old-data', -- job name
'0 3 * * *', -- cron expression: 3 AM UTC daily
$$SELECT public.cleanup_old_data()$$
);
-- ============================================================
-- Verify the job was created
-- ============================================================
SELECT * FROM cron.job WHERE jobname = 'daily-cleanup-old-data';
-- ============================================================
-- MANAGEMENT COMMANDS (for reference)
-- ============================================================
-- View all scheduled jobs:
-- SELECT * FROM cron.job;
-- View recent job runs and results:
-- SELECT * FROM cron.job_run_details ORDER BY start_time DESC LIMIT 20;
-- Run cleanup manually (to test):
-- SELECT public.cleanup_old_data();
-- Unschedule the job:
-- SELECT cron.unschedule('daily-cleanup-old-data');
-- Change schedule to weekly (Sundays at 3 AM):
-- SELECT cron.unschedule('daily-cleanup-old-data');
-- SELECT cron.schedule('weekly-cleanup-old-data', '0 3 * * 0', $$SELECT public.cleanup_old_data()$$);