Files
cim_summary/backend/enhanced-llm-process.js
Jon c67dab22b4 Add comprehensive CIM processing features and UI improvements
- Add new database migrations for analysis data and job tracking
- Implement enhanced document processing service with LLM integration
- Add processing progress and queue status components
- Create testing guides and utility scripts for CIM processing
- Update frontend components for better user experience
- Add environment configuration and backup files
- Implement job queue service and upload progress tracking
2025-07-27 20:25:46 -04:00

348 lines
14 KiB
JavaScript

const { Pool } = require('pg');
const fs = require('fs');
const pdfParse = require('pdf-parse');
const Anthropic = require('@anthropic-ai/sdk');
// Load environment variables
require('dotenv').config();
const pool = new Pool({
connectionString: 'postgresql://postgres:password@localhost:5432/cim_processor'
});
// Initialize Anthropic client
const anthropic = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
});
async function processWithEnhancedLLM(text) {
console.log('🤖 Processing with Enhanced BPCP CIM Review Template...');
try {
const prompt = `You are an expert investment analyst at BPCP (Blue Point Capital Partners) reviewing a Confidential Information Memorandum (CIM).
Your task is to analyze the following CIM document and create a comprehensive BPCP CIM Review Template following the exact structure and format specified below.
Please provide your analysis in the following JSON format that matches the BPCP CIM Review Template:
{
"dealOverview": {
"targetCompanyName": "Company name",
"industrySector": "Primary industry/sector",
"geography": "HQ & Key Operations location",
"dealSource": "How the deal was sourced",
"transactionType": "Type of transaction (e.g., LBO, Growth Equity, etc.)",
"dateCIMReceived": "Date CIM was received",
"dateReviewed": "Date reviewed (today's date)",
"reviewers": "Name(s) of reviewers",
"cimPageCount": "Number of pages in CIM",
"statedReasonForSale": "Reason for sale if provided"
},
"businessDescription": {
"coreOperationsSummary": "3-5 sentence summary of core operations",
"keyProductsServices": "Key products/services and revenue mix (estimated % if available)",
"uniqueValueProposition": "Why customers buy from this company",
"customerBaseOverview": {
"keyCustomerSegments": "Key customer segments/types",
"customerConcentrationRisk": "Top 5 and/or Top 10 customers as % revenue",
"typicalContractLength": "Typical contract length / recurring revenue %"
},
"keySupplierOverview": {
"dependenceConcentrationRisk": "Supplier dependence/concentration risk if critical"
}
},
"marketIndustryAnalysis": {
"estimatedMarketSize": "TAM/SAM if provided",
"estimatedMarketGrowthRate": "Market growth rate (% CAGR - historical & projected)",
"keyIndustryTrends": "Key industry trends & drivers (tailwinds/headwinds)",
"competitiveLandscape": {
"keyCompetitors": "Key competitors identified",
"targetMarketPosition": "Target's stated market position/rank",
"basisOfCompetition": "Basis of competition"
},
"barriersToEntry": "Barriers to entry / competitive moat"
},
"financialSummary": {
"financials": {
"fy3": {
"revenue": "Revenue amount",
"revenueGrowth": "Revenue growth %",
"grossProfit": "Gross profit amount",
"grossMargin": "Gross margin %",
"ebitda": "EBITDA amount",
"ebitdaMargin": "EBITDA margin %"
},
"fy2": {
"revenue": "Revenue amount",
"revenueGrowth": "Revenue growth %",
"grossProfit": "Gross profit amount",
"grossMargin": "Gross margin %",
"ebitda": "EBITDA amount",
"ebitdaMargin": "EBITDA margin %"
},
"fy1": {
"revenue": "Revenue amount",
"revenueGrowth": "Revenue growth %",
"grossProfit": "Gross profit amount",
"grossMargin": "Gross margin %",
"ebitda": "EBITDA amount",
"ebitdaMargin": "EBITDA margin %"
},
"ltm": {
"revenue": "Revenue amount",
"revenueGrowth": "Revenue growth %",
"grossProfit": "Gross profit amount",
"grossMargin": "Gross margin %",
"ebitda": "EBITDA amount",
"ebitdaMargin": "EBITDA margin %"
}
},
"qualityOfEarnings": "Quality of earnings/adjustments impression",
"revenueGrowthDrivers": "Revenue growth drivers (stated)",
"marginStabilityAnalysis": "Margin stability/trend analysis",
"capitalExpenditures": "Capital expenditures (LTM % of revenue)",
"workingCapitalIntensity": "Working capital intensity impression",
"freeCashFlowQuality": "Free cash flow quality impression"
},
"managementTeamOverview": {
"keyLeaders": "Key leaders identified (CEO, CFO, COO, etc.)",
"managementQualityAssessment": "Initial assessment of quality/experience",
"postTransactionIntentions": "Management's stated post-transaction role/intentions",
"organizationalStructure": "Organizational structure overview"
},
"preliminaryInvestmentThesis": {
"keyAttractions": "Key attractions/strengths (why invest?)",
"potentialRisks": "Potential risks/concerns (why not invest?)",
"valueCreationLevers": "Initial value creation levers (how PE adds value)",
"alignmentWithFundStrategy": "Alignment with BPCP fund strategy (5+MM EBITDA, consumer/industrial, M&A, technology, supply chain optimization, founder/family-owned, Cleveland/Charlotte proximity)"
},
"keyQuestionsNextSteps": {
"criticalQuestions": "Critical questions arising from CIM review",
"missingInformation": "Key missing information/areas for diligence focus",
"preliminaryRecommendation": "Preliminary recommendation (Proceed/Pass/More Info)",
"rationaleForRecommendation": "Rationale for recommendation",
"proposedNextSteps": "Proposed next steps"
}
}
CIM Document Content:
${text.substring(0, 20000)}
Please provide your analysis in valid JSON format only. Fill in all fields based on the information available in the CIM. If information is not available, use "Not specified" or "Not provided in CIM". Be thorough and professional in your analysis.`;
console.log('📤 Sending request to Anthropic Claude...');
const message = await anthropic.messages.create({
model: "claude-3-5-sonnet-20241022",
max_tokens: 4000,
temperature: 0.3,
system: "You are an expert investment analyst at BPCP. Provide comprehensive analysis in valid JSON format only, following the exact BPCP CIM Review Template structure.",
messages: [
{
role: "user",
content: prompt
}
]
});
console.log('✅ Received response from Anthropic Claude');
const responseText = message.content[0].text;
console.log('📋 Raw response length:', responseText.length, 'characters');
try {
const analysis = JSON.parse(responseText);
return analysis;
} catch (parseError) {
console.log('⚠️ Failed to parse JSON, using fallback analysis');
return {
dealOverview: {
targetCompanyName: "Company Name",
industrySector: "Industry",
geography: "Location",
dealSource: "Not specified",
transactionType: "Not specified",
dateCIMReceived: new Date().toISOString().split('T')[0],
dateReviewed: new Date().toISOString().split('T')[0],
reviewers: "Analyst",
cimPageCount: "Multiple",
statedReasonForSale: "Not specified"
},
businessDescription: {
coreOperationsSummary: "Document analysis completed",
keyProductsServices: "Not specified",
uniqueValueProposition: "Not specified",
customerBaseOverview: {
keyCustomerSegments: "Not specified",
customerConcentrationRisk: "Not specified",
typicalContractLength: "Not specified"
},
keySupplierOverview: {
dependenceConcentrationRisk: "Not specified"
}
},
marketIndustryAnalysis: {
estimatedMarketSize: "Not specified",
estimatedMarketGrowthRate: "Not specified",
keyIndustryTrends: "Not specified",
competitiveLandscape: {
keyCompetitors: "Not specified",
targetMarketPosition: "Not specified",
basisOfCompetition: "Not specified"
},
barriersToEntry: "Not specified"
},
financialSummary: {
financials: {
fy3: { revenue: "Not specified", revenueGrowth: "Not specified", grossProfit: "Not specified", grossMargin: "Not specified", ebitda: "Not specified", ebitdaMargin: "Not specified" },
fy2: { revenue: "Not specified", revenueGrowth: "Not specified", grossProfit: "Not specified", grossMargin: "Not specified", ebitda: "Not specified", ebitdaMargin: "Not specified" },
fy1: { revenue: "Not specified", revenueGrowth: "Not specified", grossProfit: "Not specified", grossMargin: "Not specified", ebitda: "Not specified", ebitdaMargin: "Not specified" },
ltm: { revenue: "Not specified", revenueGrowth: "Not specified", grossProfit: "Not specified", grossMargin: "Not specified", ebitda: "Not specified", ebitdaMargin: "Not specified" }
},
qualityOfEarnings: "Not specified",
revenueGrowthDrivers: "Not specified",
marginStabilityAnalysis: "Not specified",
capitalExpenditures: "Not specified",
workingCapitalIntensity: "Not specified",
freeCashFlowQuality: "Not specified"
},
managementTeamOverview: {
keyLeaders: "Not specified",
managementQualityAssessment: "Not specified",
postTransactionIntentions: "Not specified",
organizationalStructure: "Not specified"
},
preliminaryInvestmentThesis: {
keyAttractions: "Document reviewed",
potentialRisks: "Analysis completed",
valueCreationLevers: "Not specified",
alignmentWithFundStrategy: "Not specified"
},
keyQuestionsNextSteps: {
criticalQuestions: "Review document for specific details",
missingInformation: "Validate financial information",
preliminaryRecommendation: "More Information Required",
rationaleForRecommendation: "Document analysis completed but requires manual review",
proposedNextSteps: "Conduct detailed financial and operational diligence"
}
};
}
} catch (error) {
console.error('❌ Error calling Anthropic API:', error.message);
throw error;
}
}
async function enhancedLLMProcess() {
try {
console.log('🚀 Starting Enhanced BPCP CIM Review Template Processing');
console.log('========================================================');
console.log('🔑 Using Anthropic API Key:', process.env.ANTHROPIC_API_KEY ? '✅ Configured' : '❌ Missing');
// Find the STAX CIM document
const docResult = await pool.query(`
SELECT id, original_file_name, status, user_id, file_path
FROM documents
WHERE original_file_name = 'stax-cim-test.pdf'
ORDER BY created_at DESC
LIMIT 1
`);
if (docResult.rows.length === 0) {
console.log('❌ No STAX CIM document found');
return;
}
const document = docResult.rows[0];
console.log(`📄 Document: ${document.original_file_name}`);
console.log(`📁 File: ${document.file_path}`);
// Check if file exists
if (!fs.existsSync(document.file_path)) {
console.log('❌ File not found');
return;
}
console.log('✅ File found, extracting text...');
// Extract text from PDF
const dataBuffer = fs.readFileSync(document.file_path);
const pdfData = await pdfParse(dataBuffer);
console.log(`📊 Extracted ${pdfData.text.length} characters from ${pdfData.numpages} pages`);
// Update document status
await pool.query(`
UPDATE documents
SET status = 'processing_llm',
updated_at = CURRENT_TIMESTAMP
WHERE id = $1
`, [document.id]);
console.log('🔄 Status updated to processing_llm');
// Process with enhanced LLM
console.log('🤖 Starting Enhanced BPCP CIM Review Template analysis...');
const llmResult = await processWithEnhancedLLM(pdfData.text);
console.log('✅ Enhanced LLM processing completed!');
console.log('📋 Results Summary:');
console.log('- Company:', llmResult.dealOverview.targetCompanyName);
console.log('- Industry:', llmResult.dealOverview.industrySector);
console.log('- Geography:', llmResult.dealOverview.geography);
console.log('- Transaction Type:', llmResult.dealOverview.transactionType);
console.log('- CIM Pages:', llmResult.dealOverview.cimPageCount);
console.log('- Recommendation:', llmResult.keyQuestionsNextSteps.preliminaryRecommendation);
// Create a comprehensive summary for the database
const summary = `${llmResult.dealOverview.targetCompanyName} - ${llmResult.dealOverview.industrySector} company in ${llmResult.dealOverview.geography}. ${llmResult.businessDescription.coreOperationsSummary}`;
// Update document with results
await pool.query(`
UPDATE documents
SET status = 'completed',
generated_summary = $1,
analysis_data = $2,
updated_at = CURRENT_TIMESTAMP
WHERE id = $3
`, [summary, JSON.stringify(llmResult), document.id]);
console.log('💾 Results saved to database');
// Update processing jobs
await pool.query(`
UPDATE processing_jobs
SET status = 'completed',
progress = 100,
completed_at = CURRENT_TIMESTAMP
WHERE document_id = $1
`, [document.id]);
console.log('🎉 Enhanced BPCP CIM Review Template processing completed!');
console.log('');
console.log('📊 Next Steps:');
console.log('1. Go to http://localhost:3000');
console.log('2. Login with user1@example.com / user123');
console.log('3. Check the Documents tab');
console.log('4. Click on the STAX CIM document');
console.log('5. You should now see the full BPCP CIM Review Template');
console.log('');
console.log('🔍 Template Sections Generated:');
console.log('✅ (A) Deal Overview');
console.log('✅ (B) Business Description');
console.log('✅ (C) Market & Industry Analysis');
console.log('✅ (D) Financial Summary');
console.log('✅ (E) Management Team Overview');
console.log('✅ (F) Preliminary Investment Thesis');
console.log('✅ (G) Key Questions & Next Steps');
} catch (error) {
console.error('❌ Error during processing:', error.message);
console.error('Full error:', error);
} finally {
await pool.end();
}
}
enhancedLLMProcess();