Files
cim_summary/backend/src/scripts/check-database-failures.ts
admin 9c916d12f4 feat: Production release v2.0.0 - Simple Document Processor
Major release with significant performance improvements and new processing strategy.

## Core Changes
- Implemented simple_full_document processing strategy (default)
- Full document → LLM approach: 1-2 passes, ~5-6 minutes processing time
- Achieved 100% completeness with 2 API calls (down from 5+)
- Removed redundant Document AI passes for faster processing

## Financial Data Extraction
- Enhanced deterministic financial table parser
- Improved FY3/FY2/FY1/LTM identification from varying CIM formats
- Automatic merging of parser results with LLM extraction

## Code Quality & Infrastructure
- Cleaned up debug logging (removed emoji markers from production code)
- Fixed Firebase Secrets configuration (using modern defineSecret approach)
- Updated OpenAI API key
- Resolved deployment conflicts (secrets vs environment variables)
- Added .env files to Firebase ignore list

## Deployment
- Firebase Functions v2 deployment successful
- All 7 required secrets verified and configured
- Function URL: https://api-y56ccs6wva-uc.a.run.app

## Performance Improvements
- Processing time: ~5-6 minutes (down from 23+ minutes)
- API calls: 1-2 (down from 5+)
- Completeness: 100% achievable
- LLM Model: claude-3-7-sonnet-latest

## Breaking Changes
- Default processing strategy changed to 'simple_full_document'
- RAG processor available as alternative strategy 'document_ai_agentic_rag'

## Files Changed
- 36 files changed, 5642 insertions(+), 4451 deletions(-)
- Removed deprecated documentation files
- Cleaned up unused services and models

This release represents a major refactoring focused on speed, accuracy, and maintainability.
2025-11-09 21:07:22 -05:00

162 lines
5.0 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env ts-node
/**
* Script to check database for failed or stuck documents
*
* This script queries the documents table to find:
* - Documents stuck in 'uploading' or 'processing_llm' status
* - Documents with 'failed' status and their error messages
* - Patterns in failure types
*/
import { DocumentModel } from '../models/DocumentModel';
import { config } from '../config/env';
import { logger } from '../utils/logger';
interface DocumentStatus {
status: string;
count: number;
documents: any[];
}
interface FailurePattern {
errorPattern: string;
count: number;
examples: string[];
}
async function checkStuckDocuments() {
console.log('\n📊 Checking for Stuck Documents...\n');
try {
// Get all documents (limit to 1000 for performance)
const allDocuments = await DocumentModel.findAll(1000, 0);
// Group by status
const statusGroups: { [key: string]: any[] } = {};
for (const doc of allDocuments) {
const status = doc.status || 'unknown';
if (!statusGroups[status]) {
statusGroups[status] = [];
}
statusGroups[status].push(doc);
}
// Check for stuck documents
const stuckStatuses = ['uploading', 'processing', 'processing_llm', 'extracting_text'];
const now = Date.now();
const oneHourAgo = now - (60 * 60 * 1000);
const oneDayAgo = now - (24 * 60 * 60 * 1000);
const tenMinutesAgo = now - (10 * 60 * 1000); // Also check for documents stuck > 10 minutes
console.log('Status Summary:');
for (const [status, docs] of Object.entries(statusGroups)) {
console.log(` ${status}: ${docs.length} documents`);
if (stuckStatuses.includes(status)) {
const stuckDocs = docs.filter(doc => {
const updatedAt = doc.updated_at ? new Date(doc.updated_at).getTime() : 0;
return updatedAt < oneHourAgo;
});
if (stuckDocs.length > 0) {
console.log(` ⚠️ ${stuckDocs.length} documents stuck (not updated in last hour)`);
stuckDocs.slice(0, 5).forEach(doc => {
const updatedAt = doc.updated_at ? new Date(doc.updated_at).toISOString() : 'unknown';
console.log(` - ${doc.id}: Updated ${updatedAt}`);
});
}
}
}
// Check failed documents
const failedDocs = statusGroups['failed'] || [];
if (failedDocs.length > 0) {
console.log(`\n❌ Failed Documents: ${failedDocs.length} total\n`);
// Analyze error patterns
const errorPatterns: { [key: string]: string[] } = {};
for (const doc of failedDocs) {
const errorMsg = doc.error_message || 'Unknown error';
// Extract key error words
const keyWords = errorMsg
.toLowerCase()
.split(/\s+/)
.filter((word: string) => word.length > 5 && !['failed', 'error', 'the', 'and', 'for'].includes(word))
.slice(0, 3)
.join(' ');
if (!errorPatterns[keyWords]) {
errorPatterns[keyWords] = [];
}
errorPatterns[keyWords].push(errorMsg);
}
console.log('Error Patterns:');
const sortedPatterns = Object.entries(errorPatterns)
.sort((a, b) => b[1].length - a[1].length)
.slice(0, 10);
for (const [pattern, examples] of sortedPatterns) {
console.log(` "${pattern}": ${examples.length} occurrences`);
console.log(` Example: ${examples[0].substring(0, 100)}...`);
}
}
return {
totalDocuments: allDocuments.length,
statusGroups,
stuckCount: Object.values(statusGroups)
.flat()
.filter((doc: any) => {
const status = doc.status || 'unknown';
if (!stuckStatuses.includes(status)) return false;
const updatedAt = doc.updated_at ? new Date(doc.updated_at).getTime() : 0;
return updatedAt < oneHourAgo;
}).length,
failedCount: failedDocs.length
};
} catch (error) {
console.error('Error checking database:', error);
logger.error('Database check failed', { error });
throw error;
}
}
async function main() {
console.log('🔍 Database Failure Diagnostic Tool');
console.log('='.repeat(60));
try {
const results = await checkStuckDocuments();
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Total Documents: ${results.totalDocuments}`);
console.log(`Stuck Documents: ${results.stuckCount}`);
console.log(`Failed Documents: ${results.failedCount}`);
console.log('='.repeat(60));
if (results.stuckCount > 0 || results.failedCount > 0) {
console.log('\n⚠ Issues found. Review the details above.');
process.exit(1);
} else {
console.log('\n✅ No issues found.');
process.exit(0);
}
} catch (error) {
console.error('\n💥 Diagnostic tool encountered an error:', error);
process.exit(1);
}
}
// Run if executed directly
if (require.main === module) {
main();
}
export { checkStuckDocuments };