- Add new database migrations for analysis data and job tracking - Implement enhanced document processing service with LLM integration - Add processing progress and queue status components - Create testing guides and utility scripts for CIM processing - Update frontend components for better user experience - Add environment configuration and backup files - Implement job queue service and upload progress tracking
231 lines
6.9 KiB
JavaScript
231 lines
6.9 KiB
JavaScript
const { Pool } = require('pg');
|
|
const fs = require('fs');
|
|
const pdfParse = require('pdf-parse');
|
|
const Anthropic = require('@anthropic-ai/sdk');
|
|
|
|
// Load environment variables
|
|
require('dotenv').config();
|
|
|
|
const pool = new Pool({
|
|
connectionString: 'postgresql://postgres:password@localhost:5432/cim_processor'
|
|
});
|
|
|
|
// Initialize Anthropic client
|
|
const anthropic = new Anthropic({
|
|
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
});
|
|
|
|
async function processWithLLM(text) {
|
|
console.log('🤖 Processing with Anthropic Claude...');
|
|
|
|
try {
|
|
const prompt = `You are an expert investment analyst reviewing a Confidential Information Memorandum (CIM).
|
|
|
|
Please analyze the following CIM document and provide a comprehensive summary and analysis in the following JSON format:
|
|
|
|
{
|
|
"summary": "A concise 2-3 sentence summary of the company and investment opportunity",
|
|
"companyName": "The company name",
|
|
"industry": "Primary industry/sector",
|
|
"revenue": "Annual revenue (if available)",
|
|
"ebitda": "EBITDA (if available)",
|
|
"employees": "Number of employees (if available)",
|
|
"founded": "Year founded (if available)",
|
|
"location": "Primary location/headquarters",
|
|
"keyMetrics": {
|
|
"metric1": "value1",
|
|
"metric2": "value2"
|
|
},
|
|
"financials": {
|
|
"revenue": ["year1", "year2", "year3"],
|
|
"ebitda": ["year1", "year2", "year3"],
|
|
"margins": ["year1", "year2", "year3"]
|
|
},
|
|
"risks": [
|
|
"Risk factor 1",
|
|
"Risk factor 2",
|
|
"Risk factor 3"
|
|
],
|
|
"opportunities": [
|
|
"Opportunity 1",
|
|
"Opportunity 2",
|
|
"Opportunity 3"
|
|
],
|
|
"investmentThesis": "Key investment thesis points",
|
|
"keyQuestions": [
|
|
"Important question 1",
|
|
"Important question 2"
|
|
]
|
|
}
|
|
|
|
CIM Document Content:
|
|
${text.substring(0, 15000)}
|
|
|
|
Please provide your analysis in valid JSON format only.`;
|
|
|
|
const message = await anthropic.messages.create({
|
|
model: "claude-3-5-sonnet-20241022",
|
|
max_tokens: 2000,
|
|
temperature: 0.3,
|
|
system: "You are an expert investment analyst. Provide analysis in valid JSON format only.",
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: prompt
|
|
}
|
|
]
|
|
});
|
|
|
|
const responseText = message.content[0].text;
|
|
|
|
try {
|
|
const analysis = JSON.parse(responseText);
|
|
return analysis;
|
|
} catch (parseError) {
|
|
console.log('⚠️ Failed to parse JSON, using fallback analysis');
|
|
return {
|
|
summary: "Document analysis completed",
|
|
companyName: "Company Name",
|
|
industry: "Industry",
|
|
revenue: "Not specified",
|
|
ebitda: "Not specified",
|
|
employees: "Not specified",
|
|
founded: "Not specified",
|
|
location: "Not specified",
|
|
keyMetrics: {
|
|
"Document Type": "CIM",
|
|
"Pages": "Multiple"
|
|
},
|
|
financials: {
|
|
revenue: ["Not specified", "Not specified", "Not specified"],
|
|
ebitda: ["Not specified", "Not specified", "Not specified"],
|
|
margins: ["Not specified", "Not specified", "Not specified"]
|
|
},
|
|
risks: [
|
|
"Analysis completed",
|
|
"Document reviewed"
|
|
],
|
|
opportunities: [
|
|
"Document contains investment information",
|
|
"Ready for review"
|
|
],
|
|
investmentThesis: "Document analysis completed",
|
|
keyQuestions: [
|
|
"Review document for specific details",
|
|
"Validate financial information"
|
|
]
|
|
};
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error('❌ Error calling Anthropic API:', error.message);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async function processUploadedDocs() {
|
|
try {
|
|
console.log('🚀 Processing All Uploaded Documents');
|
|
console.log('====================================');
|
|
|
|
// Find all documents with 'uploaded' status
|
|
const uploadedDocs = await pool.query(`
|
|
SELECT id, original_file_name, status, file_path, created_at
|
|
FROM documents
|
|
WHERE status = 'uploaded'
|
|
ORDER BY created_at DESC
|
|
`);
|
|
|
|
console.log(`📋 Found ${uploadedDocs.rows.length} documents to process:`);
|
|
uploadedDocs.rows.forEach(doc => {
|
|
console.log(` - ${doc.original_file_name} (${doc.status})`);
|
|
});
|
|
|
|
if (uploadedDocs.rows.length === 0) {
|
|
console.log('✅ No documents need processing');
|
|
return;
|
|
}
|
|
|
|
// Process each document
|
|
for (const document of uploadedDocs.rows) {
|
|
console.log(`\n🔄 Processing: ${document.original_file_name}`);
|
|
|
|
try {
|
|
// Check if file exists
|
|
if (!fs.existsSync(document.file_path)) {
|
|
console.log(`❌ File not found: ${document.file_path}`);
|
|
continue;
|
|
}
|
|
|
|
// Update status to processing
|
|
await pool.query(`
|
|
UPDATE documents
|
|
SET status = 'processing_llm',
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $1
|
|
`, [document.id]);
|
|
|
|
console.log('📄 Extracting text from PDF...');
|
|
|
|
// Extract text from PDF
|
|
const dataBuffer = fs.readFileSync(document.file_path);
|
|
const pdfData = await pdfParse(dataBuffer);
|
|
|
|
console.log(`📊 Extracted ${pdfData.text.length} characters from ${pdfData.numpages} pages`);
|
|
|
|
// Process with LLM
|
|
console.log('🤖 Starting AI analysis...');
|
|
const llmResult = await processWithLLM(pdfData.text);
|
|
|
|
console.log('✅ AI analysis completed!');
|
|
console.log(`📋 Summary: ${llmResult.summary.substring(0, 100)}...`);
|
|
|
|
// Update document with results
|
|
await pool.query(`
|
|
UPDATE documents
|
|
SET status = 'completed',
|
|
generated_summary = $1,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $2
|
|
`, [llmResult.summary, document.id]);
|
|
|
|
// Update processing jobs
|
|
await pool.query(`
|
|
UPDATE processing_jobs
|
|
SET status = 'completed',
|
|
progress = 100,
|
|
completed_at = CURRENT_TIMESTAMP
|
|
WHERE document_id = $1
|
|
`, [document.id]);
|
|
|
|
console.log('💾 Results saved to database');
|
|
|
|
} catch (error) {
|
|
console.error(`❌ Error processing ${document.original_file_name}:`, error.message);
|
|
|
|
// Mark as failed
|
|
await pool.query(`
|
|
UPDATE documents
|
|
SET status = 'error',
|
|
error_message = $1,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $2
|
|
`, [error.message, document.id]);
|
|
}
|
|
}
|
|
|
|
console.log('\n🎉 Processing completed!');
|
|
console.log('📊 Next Steps:');
|
|
console.log('1. Go to http://localhost:3000');
|
|
console.log('2. Login with user1@example.com / user123');
|
|
console.log('3. Check the Documents tab');
|
|
console.log('4. All uploaded documents should now show as "Completed"');
|
|
|
|
} catch (error) {
|
|
console.error('❌ Error during processing:', error.message);
|
|
} finally {
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
processUploadedDocs();
|