-- ============================================================ -- ALTERNATIVE: PG_CRON AUTOMATED CLEANUP -- ============================================================ -- NOTE: The primary cleanup runs as a Firebase scheduled -- function (cleanupOldData in index.ts). This pg_cron -- approach is an ALTERNATIVE if you prefer database-level -- scheduling instead. -- -- Supabase includes pg_cron. This script creates scheduled -- jobs that automatically enforce retention policies. -- -- PREREQUISITE: pg_cron extension must be enabled. -- Go to Supabase Dashboard → Database → Extensions → enable pg_cron -- -- SCHEDULE: Runs daily at 03:00 UTC (off-peak) -- ============================================================ -- Enable the pg_cron extension (if not already enabled) CREATE EXTENSION IF NOT EXISTS pg_cron; -- Grant usage to postgres role (required on Supabase) GRANT USAGE ON SCHEMA cron TO postgres; -- ============================================================ -- Create the cleanup function -- ============================================================ CREATE OR REPLACE FUNCTION public.cleanup_old_data() RETURNS jsonb LANGUAGE plpgsql SECURITY DEFINER AS $$ DECLARE result jsonb := '{}'::jsonb; deleted_count bigint; BEGIN -- 1. Processing jobs: completed/failed older than 30 days DELETE FROM processing_jobs WHERE status IN ('completed', 'failed') AND completed_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('processing_jobs', deleted_count); -- 2. Execution events: older than 30 days DELETE FROM execution_events WHERE created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('execution_events', deleted_count); -- 3. Session events: older than 30 days DELETE FROM session_events WHERE created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('session_events', deleted_count); -- 4. Performance metrics: older than 90 days DELETE FROM performance_metrics WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('performance_metrics', deleted_count); -- 5. Vector similarity searches: older than 90 days DELETE FROM vector_similarity_searches WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('vector_similarity_searches', deleted_count); -- 6. Service health checks: older than 30 days (INFR-01) DELETE FROM service_health_checks WHERE created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('service_health_checks', deleted_count); -- 7. Alert events: resolved older than 30 days (INFR-01) DELETE FROM alert_events WHERE status = 'resolved' AND created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('alert_events', deleted_count); -- 8. Agent executions: older than 90 days DELETE FROM agent_executions WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('agent_executions', deleted_count); -- 9. Processing quality metrics: older than 90 days DELETE FROM processing_quality_metrics WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('processing_quality_metrics', deleted_count); -- 10. Agentic RAG sessions: completed older than 90 days DELETE FROM agentic_rag_sessions WHERE status IN ('completed', 'failed') AND created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('agentic_rag_sessions', deleted_count); -- 11. Null out extracted_text for completed documents older than 30 days UPDATE documents SET extracted_text = NULL WHERE status = 'completed' AND analysis_data IS NOT NULL AND extracted_text IS NOT NULL AND created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted_count = ROW_COUNT; result := result || jsonb_build_object('documents_text_nulled', deleted_count); RETURN result; END; $$; -- ============================================================ -- Schedule the cron job: daily at 03:00 UTC -- ============================================================ SELECT cron.schedule( 'daily-cleanup-old-data', -- job name '0 3 * * *', -- cron expression: 3 AM UTC daily $$SELECT public.cleanup_old_data()$$ ); -- ============================================================ -- Verify the job was created -- ============================================================ SELECT * FROM cron.job WHERE jobname = 'daily-cleanup-old-data'; -- ============================================================ -- MANAGEMENT COMMANDS (for reference) -- ============================================================ -- View all scheduled jobs: -- SELECT * FROM cron.job; -- View recent job runs and results: -- SELECT * FROM cron.job_run_details ORDER BY start_time DESC LIMIT 20; -- Run cleanup manually (to test): -- SELECT public.cleanup_old_data(); -- Unschedule the job: -- SELECT cron.unschedule('daily-cleanup-old-data'); -- Change schedule to weekly (Sundays at 3 AM): -- SELECT cron.unschedule('daily-cleanup-old-data'); -- SELECT cron.schedule('weekly-cleanup-old-data', '0 3 * * 0', $$SELECT public.cleanup_old_data()$$);