Files
cim_summary/debug-text-extraction.js
Jon 185c780486
Some checks failed
CI/CD Pipeline / Backend - Lint & Test (push) Has been cancelled
CI/CD Pipeline / Frontend - Lint & Test (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Build Backend (push) Has been cancelled
CI/CD Pipeline / Build Frontend (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Performance Tests (push) Has been cancelled
CI/CD Pipeline / Dependency Updates (push) Has been cancelled
🚀 Update to Claude 3.7 latest and fix LLM processing issues
- Updated Anthropic API to latest version (2024-01-01)
- Set Claude 3.7 Sonnet Latest as primary model
- Removed deprecated Opus 3.5 references
- Fixed LLM response validation and JSON parsing
- Improved error handling and logging
- Updated model configurations and pricing
- Enhanced document processing reliability
- Fixed TypeScript type issues
- Updated environment configuration
2025-08-17 17:31:56 -04:00

120 lines
3.0 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Debug script to test text extraction components
const https = require('https');
const fs = require('fs');
async function debugTextExtraction() {
console.log('🔍 Debugging Document AI Text Extraction...');
console.log('===============================================');
try {
// 1. Check if we can create a simple test PDF
console.log('\n1⃣ Testing PDF Creation...');
// Create a simple test PDF content (in a real scenario, we'd need a PDF library)
const testContent = `%PDF-1.4
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [3 0 R]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 12 Tf
72 720 Td
(Test Document for Extraction) Tj
ET
endstream
endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000074 00000 n
0000000120 00000 n
0000000179 00000 n
trailer
<<
/Size 5
/Root 1 0 R
>>
startxref
267
%%EOF`;
console.log('📄 Test PDF content created (basic structure)');
// 2. Check service configuration
console.log('\n2⃣ Checking Service Configuration...');
console.log('🔧 Testing Environment Configuration:');
console.log(' - GCS Bucket: cim-processor-testing-uploads');
console.log(' - Document AI Processor: 575027767a9291f6');
console.log(' - Location: us-central1');
console.log(' - Project: cim-summarizer-testing');
// 3. Test alternatives
console.log('\n3⃣ Testing Alternative Solutions...');
console.log('📋 Possible Solutions:');
console.log('1. Bypass Document AI and use pdf-parse only');
console.log('2. Check GCS bucket permissions');
console.log('3. Verify service account credentials');
console.log('4. Test with a simpler PDF document');
console.log('5. Add direct text input option');
// 4. Provide immediate workaround
console.log('\n4⃣ Immediate Workaround Options...');
const workarounds = [
'Add text input field to bypass PDF parsing',
'Use pre-extracted text for testing',
'Fix GCS permissions for the testing bucket',
'Create a simpler Document AI processor',
'Add better error handling and logging'
];
workarounds.forEach((solution, i) => {
console.log(` ${i+1}. ${solution}`);
});
// 5. Quick fix suggestion
console.log('\n5⃣ Quick Fix Implementation...');
console.log('🚀 Recommended immediate action:');
console.log(' Add a text input option to bypass PDF parsing temporarily');
console.log(' This allows testing the agents while fixing Document AI');
return {
status: 'DIAGNOSED',
issue: 'Document AI + PDF parsing both failing',
recommendation: 'Add text input bypass option',
priority: 'HIGH'
};
} catch (error) {
console.error('❌ Debug failed:', error);
return { status: 'FAILED', error: error.message };
}
}
debugTextExtraction().then(result => {
console.log('\n🏁 Debug Result:', result);
}).catch(console.error);