Major release with significant performance improvements and new processing strategy. ## Core Changes - Implemented simple_full_document processing strategy (default) - Full document → LLM approach: 1-2 passes, ~5-6 minutes processing time - Achieved 100% completeness with 2 API calls (down from 5+) - Removed redundant Document AI passes for faster processing ## Financial Data Extraction - Enhanced deterministic financial table parser - Improved FY3/FY2/FY1/LTM identification from varying CIM formats - Automatic merging of parser results with LLM extraction ## Code Quality & Infrastructure - Cleaned up debug logging (removed emoji markers from production code) - Fixed Firebase Secrets configuration (using modern defineSecret approach) - Updated OpenAI API key - Resolved deployment conflicts (secrets vs environment variables) - Added .env files to Firebase ignore list ## Deployment - Firebase Functions v2 deployment successful - All 7 required secrets verified and configured - Function URL: https://api-y56ccs6wva-uc.a.run.app ## Performance Improvements - Processing time: ~5-6 minutes (down from 23+ minutes) - API calls: 1-2 (down from 5+) - Completeness: 100% achievable - LLM Model: claude-3-7-sonnet-latest ## Breaking Changes - Default processing strategy changed to 'simple_full_document' - RAG processor available as alternative strategy 'document_ai_agentic_rag' ## Files Changed - 36 files changed, 5642 insertions(+), 4451 deletions(-) - Removed deprecated documentation files - Cleaned up unused services and models This release represents a major refactoring focused on speed, accuracy, and maintainability.
155 lines
5.3 KiB
TypeScript
Executable File
155 lines
5.3 KiB
TypeScript
Executable File
#!/usr/bin/env ts-node
|
||
|
||
/**
|
||
* Track the new document processing status in real-time
|
||
*/
|
||
|
||
import { getSupabaseServiceClient } from '../config/supabase';
|
||
|
||
const DOCUMENT_ID = 'c343a6ae-cfda-445e-9a4c-fb25cd1c5a81';
|
||
|
||
async function trackNewDoc() {
|
||
const supabase = getSupabaseServiceClient();
|
||
|
||
console.log('\n🔍 Tracking New Document Processing');
|
||
console.log('═'.repeat(80));
|
||
console.log(`📄 Document ID: ${DOCUMENT_ID}`);
|
||
console.log('🔄 Updates every 3 seconds');
|
||
console.log(' Press Ctrl+C to stop\n');
|
||
console.log('═'.repeat(80));
|
||
|
||
let previousStatus: string | null = null;
|
||
let checkCount = 0;
|
||
|
||
const monitorInterval = setInterval(async () => {
|
||
checkCount++;
|
||
const timestamp = new Date().toISOString();
|
||
|
||
try {
|
||
// Get document status
|
||
const { data: document, error: docError } = await supabase
|
||
.from('documents')
|
||
.select('*')
|
||
.eq('id', DOCUMENT_ID)
|
||
.single();
|
||
|
||
if (docError || !document) {
|
||
console.log(`\n❌ [${new Date().toLocaleTimeString()}] Document not found`);
|
||
clearInterval(monitorInterval);
|
||
return;
|
||
}
|
||
|
||
// Get latest job
|
||
const { data: jobs } = await supabase
|
||
.from('processing_jobs')
|
||
.select('*')
|
||
.eq('document_id', DOCUMENT_ID)
|
||
.order('created_at', { ascending: false })
|
||
.limit(1);
|
||
|
||
const latestJob = jobs?.[0];
|
||
|
||
// Get chunks count
|
||
const { count: chunkCount } = await supabase
|
||
.from('document_chunks')
|
||
.select('*', { count: 'exact', head: true })
|
||
.eq('document_id', DOCUMENT_ID);
|
||
|
||
const { count: embeddingCount } = await supabase
|
||
.from('document_chunks')
|
||
.select('*', { count: 'exact', head: true })
|
||
.eq('document_id', DOCUMENT_ID)
|
||
.not('embedding', 'is', null);
|
||
|
||
// Status change detection
|
||
const statusChanged = previousStatus !== document.status;
|
||
if (statusChanged || checkCount === 1) {
|
||
const now = Date.now();
|
||
const updated = document.updated_at ? new Date(document.updated_at).getTime() : 0;
|
||
const ageMinutes = Math.round((now - updated) / 60000);
|
||
const ageSeconds = Math.round((now - updated) / 1000);
|
||
|
||
console.log(`\n📊 [${new Date().toLocaleTimeString()}] Status Update:`);
|
||
console.log(` Status: ${document.status}`);
|
||
console.log(` File: ${document.original_file_name || 'Unknown'}`);
|
||
console.log(` Last Updated: ${ageMinutes}m ${ageSeconds % 60}s ago`);
|
||
|
||
if (latestJob) {
|
||
const jobStarted = latestJob.started_at ? new Date(latestJob.started_at).getTime() : 0;
|
||
const jobAgeMinutes = jobStarted ? Math.round((now - jobStarted) / 60000) : 0;
|
||
console.log(` Job Status: ${latestJob.status} (attempt ${latestJob.attempts || 1})`);
|
||
if (jobStarted) {
|
||
console.log(` Job Running: ${jobAgeMinutes}m ${Math.round((now - jobStarted) / 1000) % 60}s`);
|
||
}
|
||
if (latestJob.error) {
|
||
console.log(` ❌ Job Error: ${latestJob.error.substring(0, 150)}${latestJob.error.length > 150 ? '...' : ''}`);
|
||
}
|
||
}
|
||
|
||
console.log(` Chunks: ${chunkCount || 0} (${embeddingCount || 0} embedded)`);
|
||
|
||
if (document.analysis_data) {
|
||
const keys = Object.keys(document.analysis_data);
|
||
console.log(` ✅ Analysis Data: ${keys.length} keys`);
|
||
if (keys.length === 0) {
|
||
console.log(` ⚠️ WARNING: Analysis data is empty object!`);
|
||
}
|
||
} else {
|
||
console.log(` ⏳ Analysis Data: Not yet available`);
|
||
}
|
||
|
||
if (document.generated_summary) {
|
||
console.log(` ✅ Summary: ${document.generated_summary.length} characters`);
|
||
} else {
|
||
console.log(` ⏳ Summary: Not yet available`);
|
||
}
|
||
|
||
if (document.error) {
|
||
console.log(` ❌ Document Error: ${document.error.substring(0, 150)}${document.error.length > 150 ? '...' : ''}`);
|
||
}
|
||
|
||
previousStatus = document.status;
|
||
|
||
// Check if processing is complete or failed
|
||
if (document.status === 'completed' || document.status === 'failed') {
|
||
console.log(`\n${document.status === 'completed' ? '✅' : '❌'} Processing ${document.status}!`);
|
||
if (document.status === 'completed') {
|
||
console.log(' Document successfully processed.');
|
||
} else {
|
||
console.log(` Error: ${document.error || 'Unknown error'}`);
|
||
}
|
||
clearInterval(monitorInterval);
|
||
process.exit(0);
|
||
}
|
||
} else {
|
||
// Just show a heartbeat
|
||
process.stdout.write(`\r⏱️ [${new Date().toLocaleTimeString()}] Monitoring... (${checkCount} checks) - Status: ${document.status}`);
|
||
}
|
||
|
||
} catch (error) {
|
||
console.error(`\n❌ Error: ${error}`);
|
||
clearInterval(monitorInterval);
|
||
process.exit(1);
|
||
}
|
||
}, 3000);
|
||
|
||
// Handle Ctrl+C
|
||
process.on('SIGINT', () => {
|
||
console.log('\n\n👋 Stopping monitoring...');
|
||
clearInterval(monitorInterval);
|
||
process.exit(0);
|
||
});
|
||
}
|
||
|
||
// Run if executed directly
|
||
if (require.main === module) {
|
||
trackNewDoc()
|
||
.catch((error) => {
|
||
console.error('Fatal error:', error);
|
||
process.exit(1);
|
||
});
|
||
}
|
||
|
||
export { trackNewDoc };
|
||
|