Some checks failed
CI/CD Pipeline / Backend - Lint & Test (push) Has been cancelled
CI/CD Pipeline / Frontend - Lint & Test (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Build Backend (push) Has been cancelled
CI/CD Pipeline / Build Frontend (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Performance Tests (push) Has been cancelled
CI/CD Pipeline / Dependency Updates (push) Has been cancelled
- Updated Anthropic API to latest version (2024-01-01) - Set Claude 3.7 Sonnet Latest as primary model - Removed deprecated Opus 3.5 references - Fixed LLM response validation and JSON parsing - Improved error handling and logging - Updated model configurations and pricing - Enhanced document processing reliability - Fixed TypeScript type issues - Updated environment configuration
120 lines
3.0 KiB
JavaScript
120 lines
3.0 KiB
JavaScript
// Debug script to test text extraction components
|
||
const https = require('https');
|
||
const fs = require('fs');
|
||
|
||
async function debugTextExtraction() {
|
||
console.log('🔍 Debugging Document AI Text Extraction...');
|
||
console.log('===============================================');
|
||
|
||
try {
|
||
// 1. Check if we can create a simple test PDF
|
||
console.log('\n1️⃣ Testing PDF Creation...');
|
||
|
||
// Create a simple test PDF content (in a real scenario, we'd need a PDF library)
|
||
const testContent = `%PDF-1.4
|
||
1 0 obj
|
||
<<
|
||
/Type /Catalog
|
||
/Pages 2 0 R
|
||
>>
|
||
endobj
|
||
2 0 obj
|
||
<<
|
||
/Type /Pages
|
||
/Kids [3 0 R]
|
||
/Count 1
|
||
>>
|
||
endobj
|
||
3 0 obj
|
||
<<
|
||
/Type /Page
|
||
/Parent 2 0 R
|
||
/MediaBox [0 0 612 792]
|
||
/Contents 4 0 R
|
||
>>
|
||
endobj
|
||
4 0 obj
|
||
<<
|
||
/Length 44
|
||
>>
|
||
stream
|
||
BT
|
||
/F1 12 Tf
|
||
72 720 Td
|
||
(Test Document for Extraction) Tj
|
||
ET
|
||
endstream
|
||
endobj
|
||
xref
|
||
0 5
|
||
0000000000 65535 f
|
||
0000000009 00000 n
|
||
0000000074 00000 n
|
||
0000000120 00000 n
|
||
0000000179 00000 n
|
||
trailer
|
||
<<
|
||
/Size 5
|
||
/Root 1 0 R
|
||
>>
|
||
startxref
|
||
267
|
||
%%EOF`;
|
||
|
||
console.log('📄 Test PDF content created (basic structure)');
|
||
|
||
// 2. Check service configuration
|
||
console.log('\n2️⃣ Checking Service Configuration...');
|
||
console.log('🔧 Testing Environment Configuration:');
|
||
console.log(' - GCS Bucket: cim-processor-testing-uploads');
|
||
console.log(' - Document AI Processor: 575027767a9291f6');
|
||
console.log(' - Location: us-central1');
|
||
console.log(' - Project: cim-summarizer-testing');
|
||
|
||
// 3. Test alternatives
|
||
console.log('\n3️⃣ Testing Alternative Solutions...');
|
||
|
||
console.log('📋 Possible Solutions:');
|
||
console.log('1. Bypass Document AI and use pdf-parse only');
|
||
console.log('2. Check GCS bucket permissions');
|
||
console.log('3. Verify service account credentials');
|
||
console.log('4. Test with a simpler PDF document');
|
||
console.log('5. Add direct text input option');
|
||
|
||
// 4. Provide immediate workaround
|
||
console.log('\n4️⃣ Immediate Workaround Options...');
|
||
|
||
const workarounds = [
|
||
'Add text input field to bypass PDF parsing',
|
||
'Use pre-extracted text for testing',
|
||
'Fix GCS permissions for the testing bucket',
|
||
'Create a simpler Document AI processor',
|
||
'Add better error handling and logging'
|
||
];
|
||
|
||
workarounds.forEach((solution, i) => {
|
||
console.log(` ${i+1}. ${solution}`);
|
||
});
|
||
|
||
// 5. Quick fix suggestion
|
||
console.log('\n5️⃣ Quick Fix Implementation...');
|
||
console.log('🚀 Recommended immediate action:');
|
||
console.log(' Add a text input option to bypass PDF parsing temporarily');
|
||
console.log(' This allows testing the agents while fixing Document AI');
|
||
|
||
return {
|
||
status: 'DIAGNOSED',
|
||
issue: 'Document AI + PDF parsing both failing',
|
||
recommendation: 'Add text input bypass option',
|
||
priority: 'HIGH'
|
||
};
|
||
|
||
} catch (error) {
|
||
console.error('❌ Debug failed:', error);
|
||
return { status: 'FAILED', error: error.message };
|
||
}
|
||
}
|
||
|
||
debugTextExtraction().then(result => {
|
||
console.log('\n🏁 Debug Result:', result);
|
||
}).catch(console.error); |