Files
cim_summary/backend/src/scripts/track-new-doc.ts
admin 9c916d12f4 feat: Production release v2.0.0 - Simple Document Processor
Major release with significant performance improvements and new processing strategy.

## Core Changes
- Implemented simple_full_document processing strategy (default)
- Full document → LLM approach: 1-2 passes, ~5-6 minutes processing time
- Achieved 100% completeness with 2 API calls (down from 5+)
- Removed redundant Document AI passes for faster processing

## Financial Data Extraction
- Enhanced deterministic financial table parser
- Improved FY3/FY2/FY1/LTM identification from varying CIM formats
- Automatic merging of parser results with LLM extraction

## Code Quality & Infrastructure
- Cleaned up debug logging (removed emoji markers from production code)
- Fixed Firebase Secrets configuration (using modern defineSecret approach)
- Updated OpenAI API key
- Resolved deployment conflicts (secrets vs environment variables)
- Added .env files to Firebase ignore list

## Deployment
- Firebase Functions v2 deployment successful
- All 7 required secrets verified and configured
- Function URL: https://api-y56ccs6wva-uc.a.run.app

## Performance Improvements
- Processing time: ~5-6 minutes (down from 23+ minutes)
- API calls: 1-2 (down from 5+)
- Completeness: 100% achievable
- LLM Model: claude-3-7-sonnet-latest

## Breaking Changes
- Default processing strategy changed to 'simple_full_document'
- RAG processor available as alternative strategy 'document_ai_agentic_rag'

## Files Changed
- 36 files changed, 5642 insertions(+), 4451 deletions(-)
- Removed deprecated documentation files
- Cleaned up unused services and models

This release represents a major refactoring focused on speed, accuracy, and maintainability.
2025-11-09 21:07:22 -05:00

155 lines
5.3 KiB
TypeScript
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env ts-node
/**
* Track the new document processing status in real-time
*/
import { getSupabaseServiceClient } from '../config/supabase';
const DOCUMENT_ID = 'c343a6ae-cfda-445e-9a4c-fb25cd1c5a81';
async function trackNewDoc() {
const supabase = getSupabaseServiceClient();
console.log('\n🔍 Tracking New Document Processing');
console.log('═'.repeat(80));
console.log(`📄 Document ID: ${DOCUMENT_ID}`);
console.log('🔄 Updates every 3 seconds');
console.log(' Press Ctrl+C to stop\n');
console.log('═'.repeat(80));
let previousStatus: string | null = null;
let checkCount = 0;
const monitorInterval = setInterval(async () => {
checkCount++;
const timestamp = new Date().toISOString();
try {
// Get document status
const { data: document, error: docError } = await supabase
.from('documents')
.select('*')
.eq('id', DOCUMENT_ID)
.single();
if (docError || !document) {
console.log(`\n❌ [${new Date().toLocaleTimeString()}] Document not found`);
clearInterval(monitorInterval);
return;
}
// Get latest job
const { data: jobs } = await supabase
.from('processing_jobs')
.select('*')
.eq('document_id', DOCUMENT_ID)
.order('created_at', { ascending: false })
.limit(1);
const latestJob = jobs?.[0];
// Get chunks count
const { count: chunkCount } = await supabase
.from('document_chunks')
.select('*', { count: 'exact', head: true })
.eq('document_id', DOCUMENT_ID);
const { count: embeddingCount } = await supabase
.from('document_chunks')
.select('*', { count: 'exact', head: true })
.eq('document_id', DOCUMENT_ID)
.not('embedding', 'is', null);
// Status change detection
const statusChanged = previousStatus !== document.status;
if (statusChanged || checkCount === 1) {
const now = Date.now();
const updated = document.updated_at ? new Date(document.updated_at).getTime() : 0;
const ageMinutes = Math.round((now - updated) / 60000);
const ageSeconds = Math.round((now - updated) / 1000);
console.log(`\n📊 [${new Date().toLocaleTimeString()}] Status Update:`);
console.log(` Status: ${document.status}`);
console.log(` File: ${document.original_file_name || 'Unknown'}`);
console.log(` Last Updated: ${ageMinutes}m ${ageSeconds % 60}s ago`);
if (latestJob) {
const jobStarted = latestJob.started_at ? new Date(latestJob.started_at).getTime() : 0;
const jobAgeMinutes = jobStarted ? Math.round((now - jobStarted) / 60000) : 0;
console.log(` Job Status: ${latestJob.status} (attempt ${latestJob.attempts || 1})`);
if (jobStarted) {
console.log(` Job Running: ${jobAgeMinutes}m ${Math.round((now - jobStarted) / 1000) % 60}s`);
}
if (latestJob.error) {
console.log(` ❌ Job Error: ${latestJob.error.substring(0, 150)}${latestJob.error.length > 150 ? '...' : ''}`);
}
}
console.log(` Chunks: ${chunkCount || 0} (${embeddingCount || 0} embedded)`);
if (document.analysis_data) {
const keys = Object.keys(document.analysis_data);
console.log(` ✅ Analysis Data: ${keys.length} keys`);
if (keys.length === 0) {
console.log(` ⚠️ WARNING: Analysis data is empty object!`);
}
} else {
console.log(` ⏳ Analysis Data: Not yet available`);
}
if (document.generated_summary) {
console.log(` ✅ Summary: ${document.generated_summary.length} characters`);
} else {
console.log(` ⏳ Summary: Not yet available`);
}
if (document.error) {
console.log(` ❌ Document Error: ${document.error.substring(0, 150)}${document.error.length > 150 ? '...' : ''}`);
}
previousStatus = document.status;
// Check if processing is complete or failed
if (document.status === 'completed' || document.status === 'failed') {
console.log(`\n${document.status === 'completed' ? '✅' : '❌'} Processing ${document.status}!`);
if (document.status === 'completed') {
console.log(' Document successfully processed.');
} else {
console.log(` Error: ${document.error || 'Unknown error'}`);
}
clearInterval(monitorInterval);
process.exit(0);
}
} else {
// Just show a heartbeat
process.stdout.write(`\r⏱ [${new Date().toLocaleTimeString()}] Monitoring... (${checkCount} checks) - Status: ${document.status}`);
}
} catch (error) {
console.error(`\n❌ Error: ${error}`);
clearInterval(monitorInterval);
process.exit(1);
}
}, 3000);
// Handle Ctrl+C
process.on('SIGINT', () => {
console.log('\n\n👋 Stopping monitoring...');
clearInterval(monitorInterval);
process.exit(0);
});
}
// Run if executed directly
if (require.main === module) {
trackNewDoc()
.catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});
}
export { trackNewDoc };