feat(02-02): install nodemailer and create healthProbeService
- Install nodemailer + @types/nodemailer (needed by Plan 03)
- Create healthProbeService.ts with 4 probers: document_ai, llm_api, supabase, firebase_auth
- Each probe makes a real authenticated API call
- Each probe returns structured ProbeResult with status, latency_ms, error_message
- LLM probe uses cheapest model (claude-haiku-4-5) with max_tokens 5
- Supabase probe uses getPostgresPool().query('SELECT 1') not PostgREST
- Firebase Auth probe distinguishes expected vs unexpected errors
- runAllProbes orchestrator uses Promise.allSettled for fault isolation
- Results persisted via HealthCheckModel.create() after each probe
This commit is contained in:
21
backend/package-lock.json
generated
21
backend/package-lock.json
generated
@@ -26,6 +26,7 @@
|
||||
"joi": "^17.11.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"morgan": "^1.10.0",
|
||||
"nodemailer": "^8.0.1",
|
||||
"openai": "^5.10.2",
|
||||
"pdf-lib": "^1.17.1",
|
||||
"pdf-parse": "^1.1.1",
|
||||
@@ -44,6 +45,7 @@
|
||||
"@types/jsonwebtoken": "^9.0.5",
|
||||
"@types/morgan": "^1.9.9",
|
||||
"@types/node": "^20.9.0",
|
||||
"@types/nodemailer": "^7.0.11",
|
||||
"@types/pdf-parse": "^1.1.4",
|
||||
"@types/pg": "^8.10.7",
|
||||
"@types/uuid": "^10.0.0",
|
||||
@@ -2183,6 +2185,16 @@
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/nodemailer": {
|
||||
"version": "7.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@types/nodemailer/-/nodemailer-7.0.11.tgz",
|
||||
"integrity": "sha512-E+U4RzR2dKrx+u3N4DlsmLaDC6mMZOM/TPROxA0UAPiTgI0y4CEFBmZE+coGWTjakDriRsXG368lNk1u9Q0a2g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/normalize-package-data": {
|
||||
"version": "2.4.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz",
|
||||
@@ -6419,6 +6431,15 @@
|
||||
"node": ">= 6.13.0"
|
||||
}
|
||||
},
|
||||
"node_modules/nodemailer": {
|
||||
"version": "8.0.1",
|
||||
"resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-8.0.1.tgz",
|
||||
"integrity": "sha512-5kcldIXmaEjZcHR6F28IKGSgpmZHaF1IXLWFTG+Xh3S+Cce4MiakLtWY+PlBU69fLbRa8HlaGIrC/QolUpHkhg==",
|
||||
"license": "MIT-0",
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/normalize-package-data": {
|
||||
"version": "2.5.0",
|
||||
"resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz",
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.57.0",
|
||||
"@google-cloud/documentai": "^9.3.0",
|
||||
"@google-cloud/functions-framework": "^3.4.0",
|
||||
"@google-cloud/storage": "^7.16.0",
|
||||
"@supabase/supabase-js": "^2.53.0",
|
||||
"@types/pdfkit": "^0.17.2",
|
||||
@@ -58,6 +59,7 @@
|
||||
"joi": "^17.11.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"morgan": "^1.10.0",
|
||||
"nodemailer": "^8.0.1",
|
||||
"openai": "^5.10.2",
|
||||
"pdf-lib": "^1.17.1",
|
||||
"pdf-parse": "^1.1.1",
|
||||
@@ -67,8 +69,7 @@
|
||||
"uuid": "^11.1.0",
|
||||
"winston": "^3.11.0",
|
||||
"zod": "^3.25.76",
|
||||
"zod-to-json-schema": "^3.24.6",
|
||||
"@google-cloud/functions-framework": "^3.4.0"
|
||||
"zod-to-json-schema": "^3.24.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bcryptjs": "^2.4.6",
|
||||
@@ -77,6 +78,7 @@
|
||||
"@types/jsonwebtoken": "^9.0.5",
|
||||
"@types/morgan": "^1.9.9",
|
||||
"@types/node": "^20.9.0",
|
||||
"@types/nodemailer": "^7.0.11",
|
||||
"@types/pdf-parse": "^1.1.4",
|
||||
"@types/pg": "^8.10.7",
|
||||
"@types/uuid": "^10.0.0",
|
||||
@@ -84,9 +86,9 @@
|
||||
"@typescript-eslint/parser": "^6.10.0",
|
||||
"@vitest/coverage-v8": "^2.1.0",
|
||||
"eslint": "^8.53.0",
|
||||
"ts-node": "^10.9.2",
|
||||
"ts-node-dev": "^2.0.0",
|
||||
"typescript": "^5.2.2",
|
||||
"ts-node": "^10.9.2",
|
||||
"vitest": "^2.1.0"
|
||||
}
|
||||
}
|
||||
|
||||
248
backend/src/services/healthProbeService.ts
Normal file
248
backend/src/services/healthProbeService.ts
Normal file
@@ -0,0 +1,248 @@
|
||||
import { DocumentProcessorServiceClient } from '@google-cloud/documentai';
|
||||
import Anthropic from '@anthropic-ai/sdk';
|
||||
import admin from 'firebase-admin';
|
||||
import { getPostgresPool } from '../config/supabase';
|
||||
import { HealthCheckModel } from '../models/HealthCheckModel';
|
||||
import { config } from '../config/env';
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
// =============================================================================
|
||||
|
||||
export interface ProbeResult {
|
||||
service_name: string;
|
||||
status: 'healthy' | 'degraded' | 'down';
|
||||
latency_ms: number;
|
||||
error_message?: string;
|
||||
probe_details?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Individual Probers (private)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Probe Document AI by listing processors.
|
||||
* Latency > 2000ms = 'degraded'. Errors = 'down'.
|
||||
*/
|
||||
async function probeDocumentAI(): Promise<ProbeResult> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const client = new DocumentProcessorServiceClient();
|
||||
const projectId = config.googleCloud.projectId;
|
||||
const location = config.googleCloud.documentAiLocation || 'us';
|
||||
const parent = `projects/${projectId}/locations/${location}`;
|
||||
|
||||
await client.listProcessors({ parent });
|
||||
|
||||
const latency_ms = Date.now() - start;
|
||||
return {
|
||||
service_name: 'document_ai',
|
||||
status: latency_ms > 2000 ? 'degraded' : 'healthy',
|
||||
latency_ms,
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
service_name: 'document_ai',
|
||||
status: 'down',
|
||||
latency_ms: Date.now() - start,
|
||||
error_message: err instanceof Error ? err.message : String(err),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe LLM API with the cheapest model (claude-haiku-4-5) and max_tokens 5.
|
||||
* Latency > 5000ms = 'degraded'. 429 errors = 'degraded' (rate limit). Other errors = 'down'.
|
||||
*/
|
||||
async function probeLLM(): Promise<ProbeResult> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
|
||||
await client.messages.create({
|
||||
model: 'claude-haiku-4-5',
|
||||
max_tokens: 5,
|
||||
messages: [{ role: 'user', content: 'Hi' }],
|
||||
});
|
||||
|
||||
const latency_ms = Date.now() - start;
|
||||
return {
|
||||
service_name: 'llm_api',
|
||||
status: latency_ms > 5000 ? 'degraded' : 'healthy',
|
||||
latency_ms,
|
||||
};
|
||||
} catch (err) {
|
||||
const latency_ms = Date.now() - start;
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
|
||||
// 429 = rate limit — service is alive but degraded
|
||||
if (message.includes('429') || message.includes('rate limit') || message.includes('Too Many Requests')) {
|
||||
return {
|
||||
service_name: 'llm_api',
|
||||
status: 'degraded',
|
||||
latency_ms,
|
||||
error_message: message,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
service_name: 'llm_api',
|
||||
status: 'down',
|
||||
latency_ms,
|
||||
error_message: message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe Supabase using a direct PostgreSQL connection (not PostgREST).
|
||||
* Latency > 2000ms = 'degraded'. Errors = 'down'.
|
||||
*/
|
||||
async function probeSupabase(): Promise<ProbeResult> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const pool = getPostgresPool();
|
||||
await pool.query('SELECT 1');
|
||||
|
||||
const latency_ms = Date.now() - start;
|
||||
return {
|
||||
service_name: 'supabase',
|
||||
status: latency_ms > 2000 ? 'degraded' : 'healthy',
|
||||
latency_ms,
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
service_name: 'supabase',
|
||||
status: 'down',
|
||||
latency_ms: Date.now() - start,
|
||||
error_message: err instanceof Error ? err.message : String(err),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe Firebase Auth by calling verifyIdToken with an invalid token.
|
||||
* This ALWAYS throws — we distinguish healthy vs down by the error type:
|
||||
* - Errors containing 'argument', 'INVALID', or 'Decoding' = SDK alive = 'healthy'
|
||||
* - Other errors (network, config) = 'down'
|
||||
*/
|
||||
async function probeFirebaseAuth(): Promise<ProbeResult> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
// This will always throw — we probe by detecting the expected error type
|
||||
await admin.auth().verifyIdToken('invalid-token-probe-check');
|
||||
|
||||
// Should never reach here, but if it does treat as healthy
|
||||
return {
|
||||
service_name: 'firebase_auth',
|
||||
status: 'healthy',
|
||||
latency_ms: Date.now() - start,
|
||||
};
|
||||
} catch (err) {
|
||||
const latency_ms = Date.now() - start;
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
|
||||
// Expected errors from Firebase SDK when token is invalid but SDK is working:
|
||||
// - "Decoding Firebase ID token failed"
|
||||
// - "argument" (invalid argument)
|
||||
// - "INVALID" (invalid token format)
|
||||
const isExpectedError =
|
||||
message.includes('argument') ||
|
||||
message.includes('INVALID') ||
|
||||
message.includes('Decoding') ||
|
||||
message.includes('Firebase ID token') ||
|
||||
message.includes('invalid-token');
|
||||
|
||||
if (isExpectedError) {
|
||||
return {
|
||||
service_name: 'firebase_auth',
|
||||
status: 'healthy',
|
||||
latency_ms,
|
||||
probe_details: { verification_type: 'expected_token_rejection' },
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
service_name: 'firebase_auth',
|
||||
status: 'down',
|
||||
latency_ms,
|
||||
error_message: message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Orchestrator
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Run all 4 probes concurrently. Each probe is wrapped in an individual try/catch
|
||||
* so a single probe failure does not prevent others from running.
|
||||
*
|
||||
* Results are persisted to Supabase via HealthCheckModel.create() after each probe.
|
||||
*/
|
||||
async function runAllProbes(): Promise<ProbeResult[]> {
|
||||
logger.info('healthProbeService: starting all probes');
|
||||
|
||||
const probeRunners = [
|
||||
() => probeDocumentAI(),
|
||||
() => probeLLM(),
|
||||
() => probeSupabase(),
|
||||
() => probeFirebaseAuth(),
|
||||
];
|
||||
|
||||
const results: ProbeResult[] = [];
|
||||
|
||||
// Run all probes concurrently, isolating failures
|
||||
const settled = await Promise.allSettled(probeRunners.map((run) => run()));
|
||||
|
||||
for (const outcome of settled) {
|
||||
let result: ProbeResult;
|
||||
|
||||
if (outcome.status === 'fulfilled') {
|
||||
result = outcome.value;
|
||||
} else {
|
||||
// A probe threw unexpectedly outside its own try/catch — create a down result
|
||||
result = {
|
||||
service_name: 'unknown',
|
||||
status: 'down',
|
||||
latency_ms: 0,
|
||||
error_message: outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason),
|
||||
};
|
||||
}
|
||||
|
||||
results.push(result);
|
||||
|
||||
// Persist each result individually; failures here must not abort the loop
|
||||
try {
|
||||
await HealthCheckModel.create({
|
||||
service_name: result.service_name,
|
||||
status: result.status,
|
||||
latency_ms: result.latency_ms,
|
||||
error_message: result.error_message,
|
||||
probe_details: result.probe_details,
|
||||
});
|
||||
} catch (persistErr) {
|
||||
logger.error('healthProbeService: failed to persist probe result', {
|
||||
service_name: result.service_name,
|
||||
error: persistErr instanceof Error ? persistErr.message : String(persistErr),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const summary = results.reduce<Record<string, string>>((acc, r) => {
|
||||
acc[r.service_name] = r.status;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
logger.info('healthProbeService: all probes complete', { summary });
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Exports
|
||||
// =============================================================================
|
||||
|
||||
export const healthProbeService = { runAllProbes };
|
||||
Reference in New Issue
Block a user