Major release with significant performance improvements and new processing strategy. ## Core Changes - Implemented simple_full_document processing strategy (default) - Full document → LLM approach: 1-2 passes, ~5-6 minutes processing time - Achieved 100% completeness with 2 API calls (down from 5+) - Removed redundant Document AI passes for faster processing ## Financial Data Extraction - Enhanced deterministic financial table parser - Improved FY3/FY2/FY1/LTM identification from varying CIM formats - Automatic merging of parser results with LLM extraction ## Code Quality & Infrastructure - Cleaned up debug logging (removed emoji markers from production code) - Fixed Firebase Secrets configuration (using modern defineSecret approach) - Updated OpenAI API key - Resolved deployment conflicts (secrets vs environment variables) - Added .env files to Firebase ignore list ## Deployment - Firebase Functions v2 deployment successful - All 7 required secrets verified and configured - Function URL: https://api-y56ccs6wva-uc.a.run.app ## Performance Improvements - Processing time: ~5-6 minutes (down from 23+ minutes) - API calls: 1-2 (down from 5+) - Completeness: 100% achievable - LLM Model: claude-3-7-sonnet-latest ## Breaking Changes - Default processing strategy changed to 'simple_full_document' - RAG processor available as alternative strategy 'document_ai_agentic_rag' ## Files Changed - 36 files changed, 5642 insertions(+), 4451 deletions(-) - Removed deprecated documentation files - Cleaned up unused services and models This release represents a major refactoring focused on speed, accuracy, and maintainability.
106 lines
3.3 KiB
TypeScript
106 lines
3.3 KiB
TypeScript
#!/usr/bin/env ts-node
|
|
|
|
/**
|
|
* Script to check currently processing documents and their status
|
|
*/
|
|
|
|
import { getSupabaseServiceClient } from '../config/supabase';
|
|
import '../config/firebase';
|
|
|
|
async function checkCurrentProcessing() {
|
|
console.log('\n🔍 Checking Currently Processing Documents...\n');
|
|
|
|
try {
|
|
const supabase = getSupabaseServiceClient();
|
|
|
|
// Check documents in various processing statuses
|
|
const processingStatuses = ['processing', 'uploading', 'processing_llm', 'extracting_text'];
|
|
|
|
for (const status of processingStatuses) {
|
|
const { data, error } = await supabase
|
|
.from('documents')
|
|
.select('*')
|
|
.eq('status', status)
|
|
.order('updated_at', { ascending: false })
|
|
.limit(10);
|
|
|
|
if (error) {
|
|
console.error(`Error querying ${status}:`, error);
|
|
continue;
|
|
}
|
|
|
|
if (data && data.length > 0) {
|
|
console.log(`\n📄 Documents with status "${status}": ${data.length}`);
|
|
console.log('─'.repeat(80));
|
|
|
|
const now = Date.now();
|
|
for (const doc of data) {
|
|
const updatedAt = doc.updated_at ? new Date(doc.updated_at).getTime() : 0;
|
|
const ageMinutes = Math.round((now - updatedAt) / 1000 / 60);
|
|
|
|
console.log(`\n ID: ${doc.id}`);
|
|
console.log(` File: ${doc.original_file_name}`);
|
|
console.log(` Status: ${doc.status}`);
|
|
console.log(` Updated: ${doc.updated_at} (${ageMinutes} minutes ago)`);
|
|
console.log(` Created: ${doc.created_at}`);
|
|
if (doc.error_message) {
|
|
console.log(` Error: ${doc.error_message}`);
|
|
}
|
|
if (doc.file_path) {
|
|
console.log(` File Path: ${doc.file_path}`);
|
|
}
|
|
|
|
// Check if stuck
|
|
if (ageMinutes > 10) {
|
|
console.log(` ⚠️ STUCK: Not updated in ${ageMinutes} minutes`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Also check most recent documents regardless of status
|
|
console.log('\n\n📋 Most Recent Documents (Last 10):');
|
|
console.log('─'.repeat(80));
|
|
|
|
const { data: recentDocs, error: recentError } = await supabase
|
|
.from('documents')
|
|
.select('*')
|
|
.order('updated_at', { ascending: false })
|
|
.limit(10);
|
|
|
|
if (recentError) {
|
|
console.error('Error querying recent documents:', recentError);
|
|
} else if (recentDocs) {
|
|
const now = Date.now();
|
|
for (const doc of recentDocs) {
|
|
const updatedAt = doc.updated_at ? new Date(doc.updated_at).getTime() : 0;
|
|
const ageMinutes = Math.round((now - updatedAt) / 1000 / 60);
|
|
|
|
console.log(`\n ${doc.id.substring(0, 8)}... - ${doc.status.padEnd(15)} - ${ageMinutes.toString().padStart(4)} min ago - ${doc.original_file_name}`);
|
|
if (doc.error_message) {
|
|
console.log(` Error: ${doc.error_message.substring(0, 100)}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log('\n');
|
|
|
|
} catch (error) {
|
|
console.error('❌ Error:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Run if executed directly
|
|
if (require.main === module) {
|
|
checkCurrentProcessing()
|
|
.then(() => process.exit(0))
|
|
.catch((error) => {
|
|
console.error('Fatal error:', error);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
|
|
export { checkCurrentProcessing };
|
|
|