Major release with significant performance improvements and new processing strategy. ## Core Changes - Implemented simple_full_document processing strategy (default) - Full document → LLM approach: 1-2 passes, ~5-6 minutes processing time - Achieved 100% completeness with 2 API calls (down from 5+) - Removed redundant Document AI passes for faster processing ## Financial Data Extraction - Enhanced deterministic financial table parser - Improved FY3/FY2/FY1/LTM identification from varying CIM formats - Automatic merging of parser results with LLM extraction ## Code Quality & Infrastructure - Cleaned up debug logging (removed emoji markers from production code) - Fixed Firebase Secrets configuration (using modern defineSecret approach) - Updated OpenAI API key - Resolved deployment conflicts (secrets vs environment variables) - Added .env files to Firebase ignore list ## Deployment - Firebase Functions v2 deployment successful - All 7 required secrets verified and configured - Function URL: https://api-y56ccs6wva-uc.a.run.app ## Performance Improvements - Processing time: ~5-6 minutes (down from 23+ minutes) - API calls: 1-2 (down from 5+) - Completeness: 100% achievable - LLM Model: claude-3-7-sonnet-latest ## Breaking Changes - Default processing strategy changed to 'simple_full_document' - RAG processor available as alternative strategy 'document_ai_agentic_rag' ## Files Changed - 36 files changed, 5642 insertions(+), 4451 deletions(-) - Removed deprecated documentation files - Cleaned up unused services and models This release represents a major refactoring focused on speed, accuracy, and maintainability.
172 lines
6.0 KiB
TypeScript
172 lines
6.0 KiB
TypeScript
#!/usr/bin/env ts-node
|
||
/**
|
||
* Monitor system status - jobs, documents, and processing
|
||
*/
|
||
|
||
import dotenv from 'dotenv';
|
||
dotenv.config();
|
||
|
||
import { getPostgresPool } from '../config/supabase';
|
||
import { DocumentModel } from '../models/DocumentModel';
|
||
import { ProcessingJobModel } from '../models/ProcessingJobModel';
|
||
|
||
async function monitorSystem() {
|
||
console.log('🔍 Monitoring System Status...\n');
|
||
|
||
const pool = getPostgresPool();
|
||
|
||
try {
|
||
// Job status summary
|
||
const jobStatuses = await pool.query(`
|
||
SELECT status, COUNT(*) as count
|
||
FROM processing_jobs
|
||
GROUP BY status
|
||
ORDER BY status;
|
||
`);
|
||
|
||
console.log('📊 PROCESSING JOBS STATUS:');
|
||
if (jobStatuses.rows.length === 0) {
|
||
console.log(' No jobs found');
|
||
} else {
|
||
jobStatuses.rows.forEach(row => {
|
||
console.log(` ${row.status}: ${row.count}`);
|
||
});
|
||
}
|
||
|
||
// Recent jobs
|
||
const recentJobs = await pool.query(`
|
||
SELECT
|
||
id,
|
||
document_id,
|
||
status,
|
||
attempts,
|
||
max_attempts,
|
||
created_at,
|
||
started_at,
|
||
completed_at,
|
||
error
|
||
FROM processing_jobs
|
||
ORDER BY created_at DESC
|
||
LIMIT 10;
|
||
`);
|
||
|
||
console.log('\n📋 RECENT JOBS (last 10):');
|
||
if (recentJobs.rows.length === 0) {
|
||
console.log(' No jobs found');
|
||
} else {
|
||
recentJobs.rows.forEach(job => {
|
||
const id = job.id.substring(0, 8);
|
||
const docId = job.document_id.substring(0, 8);
|
||
const created = job.created_at ? new Date(job.created_at).toLocaleString() : 'N/A';
|
||
const started = job.started_at ? new Date(job.started_at).toLocaleString() : '-';
|
||
const completed = job.completed_at ? new Date(job.completed_at).toLocaleString() : '-';
|
||
const error = job.error ? ` | Error: ${job.error.substring(0, 50)}` : '';
|
||
|
||
console.log(` ${id}... | doc:${docId}... | ${job.status} | attempts: ${job.attempts}/${job.max_attempts}`);
|
||
console.log(` Created: ${created} | Started: ${started} | Completed: ${completed}${error}`);
|
||
});
|
||
}
|
||
|
||
// Stuck jobs (pending for more than 5 minutes)
|
||
const stuckJobs = await pool.query(`
|
||
SELECT id, document_id, status, created_at
|
||
FROM processing_jobs
|
||
WHERE status = 'pending'
|
||
AND created_at < NOW() - INTERVAL '5 minutes'
|
||
ORDER BY created_at ASC;
|
||
`);
|
||
|
||
if (stuckJobs.rows.length > 0) {
|
||
console.log(`\n⚠️ STUCK JOBS (pending > 5 minutes): ${stuckJobs.rows.length}`);
|
||
stuckJobs.rows.forEach(job => {
|
||
const age = Math.round((Date.now() - new Date(job.created_at).getTime()) / 1000 / 60);
|
||
console.log(` ${job.id.substring(0, 8)}... | doc:${job.document_id.substring(0, 8)}... | pending for ${age} minutes`);
|
||
});
|
||
}
|
||
|
||
// Processing jobs (started but not completed)
|
||
const processingJobs = await pool.query(`
|
||
SELECT id, document_id, status, started_at
|
||
FROM processing_jobs
|
||
WHERE status = 'processing'
|
||
ORDER BY started_at DESC;
|
||
`);
|
||
|
||
if (processingJobs.rows.length > 0) {
|
||
console.log(`\n⏳ PROCESSING JOBS (currently running): ${processingJobs.rows.length}`);
|
||
processingJobs.rows.forEach(job => {
|
||
const duration = job.started_at
|
||
? Math.round((Date.now() - new Date(job.started_at).getTime()) / 1000 / 60)
|
||
: 0;
|
||
console.log(` ${job.id.substring(0, 8)}... | doc:${job.document_id.substring(0, 8)}... | running for ${duration} minutes`);
|
||
});
|
||
}
|
||
|
||
// Recent documents
|
||
const recentDocs = await pool.query(`
|
||
SELECT
|
||
id,
|
||
original_file_name,
|
||
status,
|
||
analysis_data IS NOT NULL as has_analysis,
|
||
generated_summary IS NOT NULL as has_summary,
|
||
created_at,
|
||
processing_completed_at
|
||
FROM documents
|
||
WHERE status IN ('processing_llm', 'processing', 'completed', 'failed')
|
||
ORDER BY created_at DESC
|
||
LIMIT 10;
|
||
`);
|
||
|
||
console.log('\n📄 RECENT DOCUMENTS (last 10):');
|
||
if (recentDocs.rows.length === 0) {
|
||
console.log(' No documents found');
|
||
} else {
|
||
recentDocs.rows.forEach(doc => {
|
||
const id = doc.id.substring(0, 8);
|
||
const name = doc.original_file_name || 'unnamed';
|
||
const created = doc.created_at ? new Date(doc.created_at).toLocaleString() : 'N/A';
|
||
const completed = doc.processing_completed_at ? new Date(doc.processing_completed_at).toLocaleString() : '-';
|
||
const analysis = doc.has_analysis ? '✅' : '❌';
|
||
const summary = doc.has_summary ? '✅' : '❌';
|
||
|
||
console.log(` ${id}... | ${name.substring(0, 40)}`);
|
||
console.log(` Status: ${doc.status} | Analysis: ${analysis} | Summary: ${summary}`);
|
||
console.log(` Created: ${created} | Completed: ${completed}`);
|
||
});
|
||
}
|
||
|
||
// Documents stuck in processing
|
||
const stuckDocs = await pool.query(`
|
||
SELECT id, original_file_name, status, created_at
|
||
FROM documents
|
||
WHERE status IN ('processing_llm', 'processing')
|
||
AND created_at < NOW() - INTERVAL '10 minutes'
|
||
ORDER BY created_at ASC;
|
||
`);
|
||
|
||
if (stuckDocs.rows.length > 0) {
|
||
console.log(`\n⚠️ STUCK DOCUMENTS (processing > 10 minutes): ${stuckDocs.rows.length}`);
|
||
stuckDocs.rows.forEach(doc => {
|
||
const age = Math.round((Date.now() - new Date(doc.created_at).getTime()) / 1000 / 60);
|
||
console.log(` ${doc.id.substring(0, 8)}... | ${doc.original_file_name || 'unnamed'} | ${doc.status} for ${age} minutes`);
|
||
});
|
||
}
|
||
|
||
console.log('\n✅ Monitoring complete');
|
||
console.log('\n💡 To check Firebase logs:');
|
||
console.log(' firebase functions:log --only processDocumentJobs --limit 50');
|
||
console.log(' firebase functions:log --only api --limit 50');
|
||
|
||
await pool.end();
|
||
|
||
} catch (error) {
|
||
console.error('❌ Error monitoring system:', error instanceof Error ? error.message : String(error));
|
||
await pool.end();
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
monitorSystem().catch(console.error);
|
||
|