cim_summary/backend/test-full-pipeline.js

const path = require('path');
const { createClient } = require('@supabase/supabase-js');
const Anthropic = require('@anthropic-ai/sdk');

// Load environment variables
require('dotenv').config({ path: path.join(__dirname, '.env') });

console.log('🔧 Testing Full Processing Pipeline...\n');

async function testFullPipeline() {
  try {
    // Step 1: Test Supabase connection
    console.log('📊 Step 1: Testing Supabase connection...');
    const supabase = createClient(
      process.env.SUPABASE_URL,
      process.env.SUPABASE_SERVICE_KEY,
      {
        auth: {
          persistSession: false,
          autoRefreshToken: false,
        }
      }
    );

    const { data: testDocs, error: testError } = await supabase
      .from('documents')
      .select('id, original_file_name, status, analysis_data')
      .limit(1);

    if (testError) {
      console.error('❌ Supabase connection failed:', testError);
      return;
    }
    console.log('✅ Supabase connection successful');

    // Step 2: Test LLM service
    console.log('\n🤖 Step 2: Testing LLM service...');
    const anthropic = new Anthropic({
      apiKey: process.env.ANTHROPIC_API_KEY,
    });

    // Create the exact prompt that should be used
    const sampleCIMText = `
    CONFIDENTIAL INFORMATION MEMORANDUM

    COMPANY: Sample Manufacturing Corp.
    INDUSTRY: Industrial Manufacturing
    LOCATION: Cleveland, OH
    EMPLOYEES: 150
    REVENUE: $25M (2023), $28M (2024)
    EBITDA: $4.2M (2023), $4.8M (2024)

    BUSINESS DESCRIPTION:
    Sample Manufacturing Corp. is a leading manufacturer of precision industrial components serving the automotive and aerospace industries. The company has been in business for 25 years and operates from a 50,000 sq ft facility in Cleveland, OH.

    KEY PRODUCTS:
    - Precision machined parts (60% of revenue)
    - Assembly services (25% of revenue)
    - Engineering consulting (15% of revenue)

    CUSTOMERS:
    - Top 5 customers represent 45% of revenue
    - Long-term contracts with major automotive OEMs
    - Growing aerospace segment

    FINANCIAL PERFORMANCE:
    FY 2022: Revenue $22M, EBITDA $3.8M
    FY 2023: Revenue $25M, EBITDA $4.2M
    FY 2024: Revenue $28M, EBITDA $4.8M

    MANAGEMENT:
    CEO: John Smith (15 years experience)
    CFO: Sarah Johnson (10 years experience)
    COO: Mike Davis (12 years experience)

    REASON FOR SALE:
    Founder looking to retire and seeking strategic partner for growth.
    `;

    // Create the system prompt (exact copy from the service)
    const systemPrompt = `You are an expert investment analyst at BPCP (Blue Point Capital Partners) reviewing a Confidential Information Memorandum (CIM). Your task is to analyze CIM documents and return a comprehensive, structured JSON object that follows the BPCP CIM Review Template format EXACTLY.

CRITICAL REQUIREMENTS:
1. **JSON OUTPUT ONLY**: Your entire response MUST be a single, valid JSON object. Do not include any text or explanation before or after the JSON object.

2. **BPCP TEMPLATE FORMAT**: The JSON object MUST follow the BPCP CIM Review Template structure exactly as specified.

3. **COMPLETE ALL FIELDS**: You MUST provide a value for every field. Use "Not specified in CIM" for any information that is not available in the document.

4. **NO PLACEHOLDERS**: Do not use placeholders like "..." or "TBD". Use "Not specified in CIM" instead.

5. **PROFESSIONAL ANALYSIS**: The content should be high-quality and suitable for BPCP's investment committee.

6. **BPCP FOCUS**: Focus on companies in 5+MM EBITDA range in consumer and industrial end markets, with emphasis on M&A, technology & data usage, supply chain and human capital optimization.

7. **BPCP PREFERENCES**: BPCP prefers companies which are founder/family-owned and within driving distance of Cleveland and Charlotte.

8. **EXACT FIELD NAMES**: Use the exact field names and descriptions from the BPCP CIM Review Template.

9. **FINANCIAL DATA**: For financial metrics, use actual numbers if available, otherwise use "Not specified in CIM".

10. **VALID JSON**: Ensure your response is valid JSON that can be parsed without errors.

ANALYSIS QUALITY REQUIREMENTS:
- **Financial Precision**: Extract exact financial figures, percentages, and growth rates. Calculate CAGR where possible.
- **Competitive Intelligence**: Identify specific competitors, market positions, and competitive advantages.
- **Risk Assessment**: Evaluate both stated and implied risks, including operational, financial, and market risks.
- **Growth Drivers**: Identify specific revenue growth drivers, market expansion opportunities, and operational improvements.
- **Management Quality**: Assess management experience, track record, and post-transaction intentions.
- **Value Creation**: Identify specific value creation levers that align with BPCP's expertise.
- **Due Diligence Focus**: Highlight areas requiring deeper investigation and specific questions for management.

DOCUMENT ANALYSIS APPROACH:
- Read the entire document carefully, paying special attention to financial tables, charts, and appendices
- Cross-reference information across different sections for consistency
- Extract both explicit statements and implicit insights
- Focus on quantitative data while providing qualitative context
- Identify any inconsistencies or areas requiring clarification
- Consider industry context and market dynamics when evaluating opportunities and risks`;

    // Create the user prompt (exact copy from the service)
    const userPrompt = `Please analyze the following CIM document and extract information according to this schema:

{
  "dealOverview": {
    "targetCompanyName": "Target Company Name",
    "industrySector": "Industry/Sector",
    "geography": "Geography (HQ & Key Operations)",
    "dealSource": "Deal Source",
    "transactionType": "Transaction Type",
    "dateCIMReceived": "Date CIM Received",
    "dateReviewed": "Date Reviewed",
    "reviewers": "Reviewer(s)",
    "cimPageCount": "CIM Page Count",
    "statedReasonForSale": "Stated Reason for Sale (if provided)",
    "employeeCount": "Number of employees (if stated in document)"
  },
  "businessDescription": {
    "coreOperationsSummary": "Core Operations Summary (3-5 sentences)",
    "keyProductsServices": "Key Products/Services & Revenue Mix (Est. % if available)",
    "uniqueValueProposition": "Unique Value Proposition (UVP) / Why Customers Buy",
    "customerBaseOverview": {
      "keyCustomerSegments": "Key Customer Segments/Types",
      "customerConcentrationRisk": "Customer Concentration Risk (Top 5 and/or Top 10 Customers as % Revenue - if stated/inferable)",
      "typicalContractLength": "Typical Contract Length / Recurring Revenue % (if applicable)"
    },
    "keySupplierOverview": {
      "dependenceConcentrationRisk": "Dependence/Concentration Risk"
    }
  },
  "marketIndustryAnalysis": {
    "estimatedMarketSize": "Estimated Market Size (TAM/SAM - if provided)",
    "estimatedMarketGrowthRate": "Estimated Market Growth Rate (% CAGR - Historical & Projected)",
    "keyIndustryTrends": "Key Industry Trends & Drivers (Tailwinds/Headwinds)",
    "competitiveLandscape": {
      "keyCompetitors": "Key Competitors Identified",
      "targetMarketPosition": "Target's Stated Market Position/Rank",
      "basisOfCompetition": "Basis of Competition"
    },
    "barriersToEntry": "Barriers to Entry / Competitive Moat (Stated/Inferred)"
  },
  "financialSummary": {
    "financials": {
      "fy3": {
        "revenue": "Revenue for FY-3",
        "revenueGrowth": "Revenue growth % for FY-3",
        "grossProfit": "Gross profit for FY-3",
        "grossMargin": "Gross margin % for FY-3",
        "ebitda": "EBITDA for FY-3",
        "ebitdaMargin": "EBITDA margin % for FY-3"
      },
      "fy2": {
        "revenue": "Revenue for FY-2",
        "revenueGrowth": "Revenue growth % for FY-2",
        "grossProfit": "Gross profit for FY-2",
        "grossMargin": "Gross margin % for FY-2",
        "ebitda": "EBITDA for FY-2",
        "ebitdaMargin": "EBITDA margin % for FY-2"
      },
      "fy1": {
        "revenue": "Revenue for FY-1",
        "revenueGrowth": "Revenue growth % for FY-1",
        "grossProfit": "Gross profit for FY-1",
        "grossMargin": "Gross margin % for FY-1",
        "ebitda": "EBITDA for FY-1",
        "ebitdaMargin": "EBITDA margin % for FY-1"
      },
      "ltm": {
        "revenue": "Revenue for LTM",
        "revenueGrowth": "Revenue growth % for LTM",
        "grossProfit": "Gross profit for LTM",
        "grossMargin": "Gross margin % for LTM",
        "ebitda": "EBITDA for LTM",
        "ebitdaMargin": "EBITDA margin % for LTM"
      }
    },
    "qualityOfEarnings": "Quality of earnings/adjustments impression",
    "revenueGrowthDrivers": "Revenue growth drivers (stated)",
    "marginStabilityAnalysis": "Margin stability/trend analysis",
    "capitalExpenditures": "Capital expenditures (LTM % of revenue)",
    "workingCapitalIntensity": "Working capital intensity impression",
    "freeCashFlowQuality": "Free cash flow quality impression"
  },
  "managementTeamOverview": {
    "keyLeaders": "Key Leaders Identified (CEO, CFO, COO, Head of Sales, etc.)",
    "managementQualityAssessment": "Initial Assessment of Quality/Experience (Based on Bios)",
    "postTransactionIntentions": "Management's Stated Post-Transaction Role/Intentions (if mentioned)",
    "organizationalStructure": "Organizational Structure Overview (Impression)"
  },
  "preliminaryInvestmentThesis": {
    "keyAttractions": "Key Attractions / Strengths (Why Invest?)",
    "potentialRisks": "Potential Risks / Concerns (Why Not Invest?)",
    "valueCreationLevers": "Initial Value Creation Levers (How PE Adds Value)",
    "alignmentWithFundStrategy": "Alignment with Fund Strategy"
  },
  "keyQuestionsNextSteps": {
    "criticalQuestions": "Critical Questions Arising from CIM Review",
    "missingInformation": "Key Missing Information / Areas for Diligence Focus",
    "preliminaryRecommendation": "Preliminary Recommendation",
    "rationaleForRecommendation": "Rationale for Recommendation (Brief)",
    "proposedNextSteps": "Proposed Next Steps"
  }
}

CIM Document Text:
${sampleCIMText}

CRITICAL INSTRUCTIONS:
1. Respond with ONLY a single, valid JSON object
2. Do not include any explanatory text, markdown formatting, or code blocks
3. Do not include code block markers
4. Ensure all field names match exactly with the schema
5. Use "Not specified in CIM" for missing information
6. Ensure the JSON is properly formatted and can be parsed without errors

Your response should start with "{" and end with "}".`;

    console.log('🔄 Making LLM API call...');
    const response = await anthropic.messages.create({
      model: 'claude-3-5-sonnet-20241022',
      max_tokens: 4000,
      temperature: 0.1,
      system: systemPrompt,
      messages: [
        {
          role: 'user',
          content: userPrompt
        }
      ]
    });

    console.log('✅ LLM API call successful!');
    console.log('   Response length:', response.content[0].text.length);
    console.log('   Response preview:', response.content[0].text.substring(0, 500) + '...');

    // Step 3: Test JSON parsing (exact copy from the service)
    console.log('\n🔍 Step 3: Testing JSON parsing...');
    let analysisData = {};
    let parseSuccess = false;
    let lastParseError = null;

    for (let attempt = 1; attempt <= 3; attempt++) {
      try {
        console.log(`   Attempt ${attempt}/3...`);

        // Clean the response to extract JSON - try multiple extraction methods
        let jsonString = response.content[0].text;

        // Method 1: Try to find JSON object with regex
        const jsonMatch = response.content[0].text.match(/\{[\s\S]*\}/);
        if (jsonMatch) {
          jsonString = jsonMatch[0];
          console.log('   Method 1 (regex): JSON found');
        }

        // Method 2: If that fails, try to extract from markdown code blocks
        if (!jsonMatch) {
          const codeBlockMatch = response.content[0].text.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
          if (codeBlockMatch) {
            jsonString = codeBlockMatch[1];
            console.log('   Method 2 (code blocks): JSON found');
          }
        }

        // Method 3: If still no match, try the entire content
        if (!jsonMatch && !codeBlockMatch) {
          jsonString = response.content[0].text.trim();
          // Remove any leading/trailing text that's not JSON
          if (!jsonString.startsWith('{')) {
            const firstBrace = jsonString.indexOf('{');
            if (firstBrace !== -1) {
              jsonString = jsonString.substring(firstBrace);
            }
          }
          if (!jsonString.endsWith('}')) {
            const lastBrace = jsonString.lastIndexOf('}');
            if (lastBrace !== -1) {
              jsonString = jsonString.substring(0, lastBrace + 1);
            }
          }
          console.log('   Method 3 (content trimming): JSON found');
        }

        // Parse the JSON
        analysisData = JSON.parse(jsonString);
        console.log('   ✅ JSON parsing successful');
        console.log('   Analysis data keys:', Object.keys(analysisData));

        parseSuccess = true;
        break;

      } catch (parseError) {
        lastParseError = parseError;
        console.log(`   ❌ JSON parsing failed on attempt ${attempt}:`, parseError.message);

        if (attempt === 3) {
          console.log('   ❌ All JSON parsing attempts failed');
          analysisData = {};
        }
      }
    }

    // Step 4: Test database storage
    console.log('\n💾 Step 4: Testing database storage...');
    if (parseSuccess && Object.keys(analysisData).length > 0) {
      console.log('   ✅ Analysis data is valid, would be stored in database');
      console.log('   Sample data:', JSON.stringify(analysisData, null, 2).substring(0, 1000) + '...');
    } else {
      console.log('   ❌ Analysis data is empty or invalid');
    }

    console.log('\n✅ Full pipeline test completed!');

  } catch (error) {
    console.error('❌ Pipeline test failed:', error.message);
    console.error('   Error details:', error);
  }
}

testFullPipeline();