diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 0f47008..5197d62 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -62,7 +62,11 @@ Plans: 2. `GET /admin/analytics` returns processing summary (upload counts, success/failure rates, avg processing time) sourced from Supabase, not in-memory state 3. `GET /admin/alerts` and `POST /admin/alerts/:id/acknowledge` function correctly and are blocked to non-admin users 4. Document processing in `jobProcessorService.ts` and `llmService.ts` emits analytics events at stage transitions without any change to existing processing behavior -**Plans**: TBD +**Plans:** 2 plans + +Plans: +- [ ] 03-01-PLAN.md — Admin auth middleware + admin routes (health, analytics, alerts endpoints) +- [ ] 03-02-PLAN.md — Analytics instrumentation in jobProcessorService ### Phase 4: Frontend **Goal**: The admin can see live service health, processing metrics, and active alerts directly in the application UI diff --git a/.planning/phases/03-api-layer/03-01-PLAN.md b/.planning/phases/03-api-layer/03-01-PLAN.md new file mode 100644 index 0000000..ac71d80 --- /dev/null +++ b/.planning/phases/03-api-layer/03-01-PLAN.md @@ -0,0 +1,283 @@ +--- +phase: 03-api-layer +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - backend/src/middleware/requireAdmin.ts + - backend/src/services/analyticsService.ts + - backend/src/routes/admin.ts + - backend/src/index.ts +autonomous: true +requirements: + - INFR-02 + - HLTH-01 + - ANLY-02 + +must_haves: + truths: + - "GET /admin/health returns current health status for all four services when called by admin" + - "GET /admin/analytics returns processing summary (uploads, success/failure, avg time) for a configurable time range" + - "GET /admin/alerts returns active alert events" + - "POST /admin/alerts/:id/acknowledge marks an alert as acknowledged" + - "Non-admin authenticated users receive 404 on all admin endpoints" + - "Unauthenticated requests receive 401 on admin endpoints" + artifacts: + - path: "backend/src/middleware/requireAdmin.ts" + provides: "Admin email check middleware returning 404 for non-admin" + exports: ["requireAdminEmail"] + - path: "backend/src/routes/admin.ts" + provides: "Admin router with health, analytics, alerts endpoints" + exports: ["default"] + - path: "backend/src/services/analyticsService.ts" + provides: "getAnalyticsSummary function using Postgres pool for aggregate queries" + exports: ["getAnalyticsSummary", "AnalyticsSummary"] + key_links: + - from: "backend/src/routes/admin.ts" + to: "backend/src/middleware/requireAdmin.ts" + via: "router.use(requireAdminEmail)" + pattern: "requireAdminEmail" + - from: "backend/src/routes/admin.ts" + to: "backend/src/models/HealthCheckModel.ts" + via: "HealthCheckModel.findLatestByService()" + pattern: "findLatestByService" + - from: "backend/src/routes/admin.ts" + to: "backend/src/services/analyticsService.ts" + via: "getAnalyticsSummary(range)" + pattern: "getAnalyticsSummary" + - from: "backend/src/index.ts" + to: "backend/src/routes/admin.ts" + via: "app.use('/admin', adminRoutes)" + pattern: "app\\.use.*admin" +--- + + +Create admin-authenticated HTTP endpoints exposing health status, alerts, and processing analytics. + +Purpose: Enables the admin to query service health, view active alerts, acknowledge alerts, and see processing analytics through protected API routes. This is the data access layer that Phase 4 (frontend) will consume. + +Output: Four working admin endpoints behind Firebase Auth + admin email verification, plus the `getAnalyticsSummary()` query function. + + + +@/home/jonathan/.claude/get-shit-done/workflows/execute-plan.md +@/home/jonathan/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/03-api-layer/03-RESEARCH.md + +@backend/src/middleware/firebaseAuth.ts +@backend/src/models/HealthCheckModel.ts +@backend/src/models/AlertEventModel.ts +@backend/src/services/analyticsService.ts +@backend/src/services/healthProbeService.ts +@backend/src/routes/monitoring.ts +@backend/src/index.ts + + + + + + Task 1: Create requireAdmin middleware and getAnalyticsSummary function + + backend/src/middleware/requireAdmin.ts + backend/src/services/analyticsService.ts + + +**1. Create `backend/src/middleware/requireAdmin.ts`:** + +Create the admin email check middleware. This runs AFTER `verifyFirebaseToken` in the middleware chain, so `req.user` is already populated. + +```typescript +import { Response, NextFunction } from 'express'; +import { FirebaseAuthenticatedRequest } from './firebaseAuth'; +import { logger } from '../utils/logger'; + +export function requireAdminEmail( + req: FirebaseAuthenticatedRequest, + res: Response, + next: NextFunction +): void { + // Read inside function, not at module level — Firebase Secrets not available at module load time + const adminEmail = process.env['ADMIN_EMAIL'] ?? process.env['EMAIL_WEEKLY_RECIPIENT']; + + if (!adminEmail) { + logger.warn('requireAdminEmail: neither ADMIN_EMAIL nor EMAIL_WEEKLY_RECIPIENT is configured — denying all admin access'); + res.status(404).json({ error: 'Not found' }); + return; + } + + const userEmail = req.user?.email; + + if (!userEmail || userEmail !== adminEmail) { + // 404 — do not reveal admin routes exist (per locked decision) + logger.warn('requireAdminEmail: access denied', { + uid: req.user?.uid ?? 'unauthenticated', + email: userEmail ?? 'none', + path: req.path, + }); + res.status(404).json({ error: 'Not found' }); + return; + } + + next(); +} +``` + +Key constraints: +- Return 404 (not 403) for non-admin users — per locked decision, do not reveal admin routes exist +- Read env vars inside function body, not module level (Firebase Secrets timing, matches alertService pattern) +- Fail closed: if no admin email configured, deny all access with logged warning + +**2. Add `getAnalyticsSummary()` to `backend/src/services/analyticsService.ts`:** + +Add below the existing `deleteProcessingEventsOlderThan` function. Use `getPostgresPool()` (from `../config/supabase`) for aggregate SQL — Supabase JS client does not support COUNT/AVG. + +```typescript +import { getPostgresPool } from '../config/supabase'; + +export interface AnalyticsSummary { + range: string; + totalUploads: number; + succeeded: number; + failed: number; + successRate: number; + avgProcessingMs: number | null; + generatedAt: string; +} + +function parseRange(range: string): string { + if (/^\d+h$/.test(range)) return range.replace('h', ' hours'); + if (/^\d+d$/.test(range)) return range.replace('d', ' days'); + return '24 hours'; // fallback default +} + +export async function getAnalyticsSummary(range: string = '24h'): Promise { + const interval = parseRange(range); + const pool = getPostgresPool(); + + const { rows } = await pool.query<{ + total_uploads: string; + succeeded: string; + failed: string; + avg_processing_ms: string | null; + }>(` + SELECT + COUNT(*) FILTER (WHERE event_type = 'upload_started') AS total_uploads, + COUNT(*) FILTER (WHERE event_type = 'completed') AS succeeded, + COUNT(*) FILTER (WHERE event_type = 'failed') AS failed, + AVG(duration_ms) FILTER (WHERE event_type = 'completed') AS avg_processing_ms + FROM document_processing_events + WHERE created_at >= NOW() - $1::interval + `, [interval]); + + const row = rows[0]!; + const total = parseInt(row.total_uploads, 10); + const succeeded = parseInt(row.succeeded, 10); + const failed = parseInt(row.failed, 10); + + return { + range, + totalUploads: total, + succeeded, + failed, + successRate: total > 0 ? succeeded / total : 0, + avgProcessingMs: row.avg_processing_ms ? parseFloat(row.avg_processing_ms) : null, + generatedAt: new Date().toISOString(), + }; +} +``` + +Note: Use `$1::interval` cast for parameterized interval — PostgreSQL requires explicit cast for interval parameters. + + + cd /home/jonathan/Coding/cim_summary/backend && npx tsc --noEmit 2>&1 | head -30 + Check that requireAdmin.ts exports requireAdminEmail and analyticsService.ts exports getAnalyticsSummary + + requireAdminEmail middleware returns 404 for non-admin users and calls next() for admin. getAnalyticsSummary queries document_processing_events with configurable time range and returns structured summary. + + + + Task 2: Create admin routes and mount in Express app + + backend/src/routes/admin.ts + backend/src/index.ts + + +**1. Create `backend/src/routes/admin.ts`:** + +Follow the exact pattern from `routes/monitoring.ts`. Apply `verifyFirebaseToken` + `requireAdminEmail` + `addCorrelationId` as router-level middleware. Use the `{ success, data, correlationId }` envelope pattern. + +Service names MUST match what healthProbeService writes (confirmed from codebase): `'document_ai'`, `'llm_api'`, `'supabase'`, `'firebase_auth'`. + +**Endpoints:** + +**GET /health** — Returns latest health check for all four services. +- Use `Promise.all(SERVICE_NAMES.map(name => HealthCheckModel.findLatestByService(name)))`. +- For each result, map to `{ service, status, checkedAt, latencyMs, errorMessage }`. If `findLatestByService` returns null, use `status: 'unknown'`. +- Return `{ success: true, data: [...], correlationId }`. + +**GET /analytics** — Returns processing summary. +- Accept `?range=24h` query param (default: `'24h'`). +- Validate range matches `/^\d+[hd]$/` — return 400 if invalid. +- Call `getAnalyticsSummary(range)`. +- Return `{ success: true, data: summary, correlationId }`. + +**GET /alerts** — Returns active alerts. +- Call `AlertEventModel.findActive()` (no arguments = all active alerts). +- Return `{ success: true, data: alerts, correlationId }`. + +**POST /alerts/:id/acknowledge** — Acknowledge an alert. +- Call `AlertEventModel.acknowledge(req.params.id)`. +- If error message includes 'not found', return 404. +- Return `{ success: true, data: updatedAlert, correlationId }`. + +All error handlers follow the same pattern: `logger.error(...)` then `res.status(500).json({ success: false, error: 'Human-readable message', correlationId })`. + +**2. Mount in `backend/src/index.ts`:** + +Add import: `import adminRoutes from './routes/admin';` + +Add route registration alongside existing routes (after the `app.use('/api/audit', auditRoutes);` line): +```typescript +app.use('/admin', adminRoutes); +``` + +The `/admin` prefix is unique — no conflicts with existing routes. + + + cd /home/jonathan/Coding/cim_summary/backend && npx tsc --noEmit 2>&1 | head -30 + Verify admin.ts has 4 route handlers, index.ts mounts /admin + + Four admin endpoints (GET /health, GET /analytics, GET /alerts, POST /alerts/:id/acknowledge) are mounted behind Firebase Auth + admin email check. Non-admin users get 404. Response envelope matches existing codebase pattern. + + + + + +1. `npx tsc --noEmit` passes with no errors +2. `backend/src/middleware/requireAdmin.ts` exists and exports `requireAdminEmail` +3. `backend/src/routes/admin.ts` exists and exports default Router with 4 endpoints +4. `backend/src/services/analyticsService.ts` exports `getAnalyticsSummary` and `AnalyticsSummary` +5. `backend/src/index.ts` imports and mounts admin routes at `/admin` +6. Admin routes use `verifyFirebaseToken` + `requireAdminEmail` middleware chain +7. Service names in health endpoint match healthProbeService: `document_ai`, `llm_api`, `supabase`, `firebase_auth` + + + +- TypeScript compiles without errors +- All four admin endpoints defined with correct HTTP methods and paths +- Admin auth middleware returns 404 for non-admin, next() for admin +- Analytics summary uses getPostgresPool() for aggregate SQL, not Supabase JS client +- Response envelope matches `{ success, data, correlationId }` pattern +- No `console.log` — all logging via Winston logger + + + +After completion, create `.planning/phases/03-api-layer/03-01-SUMMARY.md` + diff --git a/.planning/phases/03-api-layer/03-02-PLAN.md b/.planning/phases/03-api-layer/03-02-PLAN.md new file mode 100644 index 0000000..fd815c6 --- /dev/null +++ b/.planning/phases/03-api-layer/03-02-PLAN.md @@ -0,0 +1,149 @@ +--- +phase: 03-api-layer +plan: 02 +type: execute +wave: 1 +depends_on: [] +files_modified: + - backend/src/services/jobProcessorService.ts +autonomous: true +requirements: + - ANLY-02 + +must_haves: + truths: + - "Document processing emits upload_started event after job is marked as processing" + - "Document processing emits completed event with duration_ms after job succeeds" + - "Document processing emits failed event with duration_ms and error_message when job fails" + - "Analytics instrumentation does not change existing processing behavior or error handling" + artifacts: + - path: "backend/src/services/jobProcessorService.ts" + provides: "Analytics instrumentation at 3 lifecycle points in processJob()" + contains: "recordProcessingEvent" + key_links: + - from: "backend/src/services/jobProcessorService.ts" + to: "backend/src/services/analyticsService.ts" + via: "import and call recordProcessingEvent()" + pattern: "recordProcessingEvent" +--- + + +Instrument the document processing pipeline with fire-and-forget analytics events at key lifecycle points. + +Purpose: Enables the analytics endpoint (Plan 03-01) to report real processing data. Without instrumentation, `document_processing_events` table stays empty and `GET /admin/analytics` returns zeros. + +Output: Three `recordProcessingEvent()` calls in `jobProcessorService.processJob()` — one at job start, one at completion, one at failure. + + + +@/home/jonathan/.claude/get-shit-done/workflows/execute-plan.md +@/home/jonathan/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/03-api-layer/03-RESEARCH.md + +@backend/src/services/jobProcessorService.ts +@backend/src/services/analyticsService.ts + + + + + + Task 1: Add analytics instrumentation to processJob lifecycle + + backend/src/services/jobProcessorService.ts + + +**1. Add import at top of file:** + +```typescript +import { recordProcessingEvent } from './analyticsService'; +``` + +**2. Emit `upload_started` after `markAsProcessing` (line ~133):** + +After `await ProcessingJobModel.markAsProcessing(jobId);` and `jobStatusUpdated = true;`, add: + +```typescript +// Analytics: job processing started (fire-and-forget, void return) +recordProcessingEvent({ + document_id: job.document_id, + user_id: job.user_id, + event_type: 'upload_started', +}); +``` + +Place this BEFORE the timeout setup block (before `const processingTimeout = ...`). + +**3. Emit `completed` after `markAsCompleted` (line ~329):** + +After `const processingTime = Date.now() - startTime;` and the `logger.info('Job completed successfully', ...)` call, add: + +```typescript +// Analytics: job completed (fire-and-forget, void return) +recordProcessingEvent({ + document_id: job.document_id, + user_id: job.user_id, + event_type: 'completed', + duration_ms: processingTime, +}); +``` + +**4. Emit `failed` in catch block (line ~355-368):** + +After `const processingTime = Date.now() - startTime;` and `logger.error('Job processing failed', ...)`, but BEFORE the `try { await ProcessingJobModel.markAsFailed(...)` block, add: + +```typescript +// Analytics: job failed (fire-and-forget, void return) +// Guard with job check — job is null if findById failed before assignment +if (job) { + recordProcessingEvent({ + document_id: job.document_id, + user_id: job.user_id, + event_type: 'failed', + duration_ms: processingTime, + error_message: errorMessage, + }); +} +``` + +**Critical constraints:** +- `recordProcessingEvent` returns `void` (not `Promise`) — do NOT use `await`. This is the fire-and-forget guarantee (PITFALL-6, STATE.md decision). +- Do NOT wrap in try/catch — the function internally catches all errors and logs them. +- Do NOT modify any existing code around the instrumentation points — add lines, don't change lines. +- Guard `job` in catch block — it can be null if `findById` threw before assignment. +- Use `event_type: 'upload_started'` (not `'processing_started'`) — per locked decision, key milestones only: upload started, processing complete, processing failed. + + + cd /home/jonathan/Coding/cim_summary/backend && npx tsc --noEmit 2>&1 | head -30 && npx vitest run --reporter=verbose 2>&1 | tail -20 + Verify 3 recordProcessingEvent calls exist in jobProcessorService.ts, none use await + + processJob() emits upload_started after markAsProcessing, completed with duration after markAsCompleted, and failed with duration+error in catch block. All calls are fire-and-forget (no await). Existing processing logic unchanged — no behavior modification. + + + + + +1. `npx tsc --noEmit` passes with no errors +2. `npx vitest run` — all existing tests pass (no regressions) +3. `grep -c 'recordProcessingEvent' backend/src/services/jobProcessorService.ts` returns 3 +4. `grep 'await recordProcessingEvent' backend/src/services/jobProcessorService.ts` returns nothing (no accidental await) +5. `recordProcessingEvent` import exists at top of file + + + +- TypeScript compiles without errors +- All existing tests pass (zero regressions) +- Three recordProcessingEvent calls at correct lifecycle points +- No await on recordProcessingEvent (fire-and-forget preserved) +- job null-guard in catch block prevents runtime errors +- No changes to existing processing logic + + + +After completion, create `.planning/phases/03-api-layer/03-02-SUMMARY.md` +