cim_summary/backend/test-complete-pipeline.js

#!/usr/bin/env node

// Test the complete document processing pipeline with the existing PDF
const fs = require('fs');
const path = require('path');

async function testCompletePipeline() {
  console.log('🧪 Testing Complete Document Processing Pipeline...');
  console.log('==================================================');

  try {
    // 1. Use the existing PDF in the bucket
    console.log('\n1️⃣ Testing with existing PDF...');

    const existingPdfPath = 'uploads/3uZ0RWdJVDQ6PxDGA2uAn4bU8QO2/1755371605515_2025-04-23_Stax_Holding_Company__LLC_Confidential_Information_Presentation_for_Stax_Holding_Company__LLC_-_April_2025-1.pdf';
    console.log('📄 Test file:', existingPdfPath);

    // 2. Import the documentAiProcessor with correct environment
    console.log('\n2️⃣ Loading Document AI Processor...');

    // Set environment first
    process.env.NODE_ENV = 'testing';
    process.env.GOOGLE_APPLICATION_CREDENTIALS = './serviceAccountKey-testing.json';
    process.env.GCLOUD_PROJECT_ID = 'cim-summarizer-testing';
    process.env.DOCUMENT_AI_LOCATION = 'us';
    process.env.DOCUMENT_AI_PROCESSOR_ID = '575027767a9291f6';
    process.env.GCS_BUCKET_NAME = 'cim-processor-testing-uploads';

    console.log('✅ Environment configured');

    // Import with TypeScript transpiling
    const { register } = require('ts-node');
    register({
      transpileOnly: true,
      compilerOptions: {
        module: 'commonjs',
        target: 'es2020'
      }
    });

    console.log('\n3️⃣ Testing Document AI Text Extraction...');

    // Import the processor
    const { DocumentAiProcessor } = require('./src/services/documentAiProcessor.ts');
    const processor = new DocumentAiProcessor();

    // Test connection first
    const connectionTest = await processor.testConnection();
    console.log('🔗 Connection test:', connectionTest);

    if (!connectionTest.success) {
      throw new Error(`Connection failed: ${connectionTest.error}`);
    }

    // 4. Download the existing file for testing
    console.log('\n4️⃣ Downloading test file from GCS...');
    const { Storage } = require('@google-cloud/storage');
    const storage = new Storage();
    const bucket = storage.bucket('cim-processor-testing-uploads');
    const file = bucket.file(existingPdfPath);

    const [fileBuffer] = await file.download();
    console.log(`📄 Downloaded file: ${fileBuffer.length} bytes`);

    // 5. Process the document
    console.log('\n5️⃣ Processing document through pipeline...');
    const crypto = require('crypto');
    const testDocId = crypto.randomUUID();
    const testUserId = crypto.randomUUID();

    const result = await processor.processDocument(
      testDocId,
      testUserId,
      fileBuffer,
      'test-document.pdf',
      'application/pdf'
    );

    console.log('\n📊 Processing Results:');
    console.log('======================');
    console.log('✅ Success:', result.success);
    console.log('📝 Content length:', result.content?.length || 0);
    console.log('🔍 Content preview:', result.content?.substring(0, 200) + '...');
    console.log('📋 Metadata:', JSON.stringify(result.metadata, null, 2));

    if (result.error) {
      console.log('❌ Error:', result.error);
    }

    return result;

  } catch (error) {
    console.error('❌ Pipeline test failed:', error);
    console.error('Stack:', error.stack);
    return { success: false, error: error.message };
  }
}

testCompletePipeline().then(result => {
  console.log('\n🏁 Final Pipeline Test Result:');
  console.log('Success:', result.success);
  if (result.success) {
    console.log('🎉 The agents are working! The complete pipeline is functional.');
  } else {
    console.log('❌ Pipeline still has issues:', result.error);
  }
}).catch(console.error);