-- ============================================================ -- CLEANUP OLD DATA - Run in Supabase SQL Editor -- ============================================================ -- Removes stale data that accumulates over time without -- impacting application functionality. -- -- SAFE TO RUN: All deleted data is either intermediate -- processing artifacts or analytics logs. Core document -- data (documents, document_chunks, analysis_data) is -- never touched by DELETE statements. -- -- Skips tables that don't exist yet (safe for any state). -- -- RECOMMENDATION: Run the check_table_sizes.sql query first -- to see how much data will be affected. -- ============================================================ DO $$ DECLARE deleted bigint; BEGIN -- 1. Processing jobs: completed/failed older than 30 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'processing_jobs') THEN DELETE FROM processing_jobs WHERE status IN ('completed', 'failed') AND completed_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'processing_jobs: deleted % rows', deleted; END IF; -- 2. Execution events: older than 30 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'execution_events') THEN DELETE FROM execution_events WHERE created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'execution_events: deleted % rows', deleted; END IF; -- 3. Session events: older than 30 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'session_events') THEN DELETE FROM session_events WHERE created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'session_events: deleted % rows', deleted; END IF; -- 4. Performance metrics: older than 90 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'performance_metrics') THEN DELETE FROM performance_metrics WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'performance_metrics: deleted % rows', deleted; END IF; -- 5. Vector similarity searches: older than 90 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'vector_similarity_searches') THEN DELETE FROM vector_similarity_searches WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'vector_similarity_searches: deleted % rows', deleted; END IF; -- 6. Service health checks: older than 30 days (INFR-01) IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'service_health_checks') THEN DELETE FROM service_health_checks WHERE created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'service_health_checks: deleted % rows', deleted; END IF; -- 7. Alert events: resolved older than 30 days (INFR-01) IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'alert_events') THEN DELETE FROM alert_events WHERE status = 'resolved' AND created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'alert_events: deleted % rows', deleted; END IF; -- 8. Agent executions: older than 90 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'agent_executions') THEN DELETE FROM agent_executions WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'agent_executions: deleted % rows', deleted; END IF; -- 9. Processing quality metrics: older than 90 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'processing_quality_metrics') THEN DELETE FROM processing_quality_metrics WHERE created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'processing_quality_metrics: deleted % rows', deleted; END IF; -- 10. Agentic RAG sessions: completed older than 90 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'agentic_rag_sessions') THEN DELETE FROM agentic_rag_sessions WHERE status IN ('completed', 'failed') AND created_at < NOW() - INTERVAL '90 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'agentic_rag_sessions: deleted % rows', deleted; END IF; -- 11. Null out extracted_text for completed documents older than 30 days IF EXISTS (SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relname = 'documents') THEN UPDATE documents SET extracted_text = NULL WHERE status = 'completed' AND analysis_data IS NOT NULL AND extracted_text IS NOT NULL AND created_at < NOW() - INTERVAL '30 days'; GET DIAGNOSTICS deleted = ROW_COUNT; RAISE NOTICE 'documents extracted_text nulled: % rows', deleted; END IF; RAISE NOTICE '--- CLEANUP COMPLETE ---'; END $$;