Files
cim_summary/backend/src/services/pdfGenerationService.ts

631 lines
17 KiB
TypeScript

// Mock puppeteer in test environment
let puppeteer: any;
try {
puppeteer = require('puppeteer');
} catch (error) {
// Mock puppeteer for test environment
puppeteer = {
launch: async () => ({
newPage: async () => ({
setContent: async () => {},
pdf: async () => {},
close: async () => {},
evaluate: async () => ({ title: 'Test', url: 'test://' }),
goto: async () => {},
}),
close: async () => {},
}),
};
}
import fs from 'fs';
import path from 'path';
import { logger } from '../utils/logger';
export interface PDFGenerationOptions {
format?: 'A4' | 'Letter';
margin?: {
top: string;
right: string;
bottom: string;
left: string;
};
headerTemplate?: string;
footerTemplate?: string;
displayHeaderFooter?: boolean;
printBackground?: boolean;
}
class PDFGenerationService {
private browser: any = null;
private readonly defaultOptions: PDFGenerationOptions = {
format: 'A4',
margin: {
top: '1in',
right: '1in',
bottom: '1in',
left: '1in',
},
displayHeaderFooter: true,
printBackground: true,
};
/**
* Initialize the browser instance
*/
private async getBrowser(): Promise<any> {
if (!this.browser) {
this.browser = await puppeteer.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
],
});
}
return this.browser;
}
/**
* Convert markdown to HTML
*/
private markdownToHTML(markdown: string): string {
// Enhanced markdown to HTML conversion with table support
let html = markdown
// Headers
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
// Bold
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
// Italic
.replace(/\*(.*?)\*/g, '<em>$1</em>')
// Lists
.replace(/^- (.*$)/gim, '<li>$1</li>')
// Paragraphs (but preserve tables)
.replace(/\n\n/g, '</p><p>')
.replace(/^(.+)$/gm, '<p>$1</p>');
// Wrap lists properly
html = html.replace(/<li>(.*?)<\/li>/g, '<ul><li>$1</li></ul>');
html = html.replace(/<\/ul>\s*<ul>/g, '');
// Preserve HTML tables by removing paragraph tags around them
html = html.replace(/<p><table/g, '<table');
html = html.replace(/<\/table><\/p>/g, '</table>');
html = html.replace(/<p><\/table>/g, '</table>');
html = html.replace(/<p><table/g, '<table');
return `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>CIM Review Summary</title>
<style>
@page {
margin: 0.75in;
size: A4;
}
body {
font-family: 'Times New Roman', serif;
font-size: 10pt;
line-height: 1.4;
color: #2c3e50;
margin: 0;
padding: 0;
}
h1 {
font-size: 18pt;
font-weight: bold;
color: #1a365d;
text-align: center;
margin-bottom: 8pt;
border-bottom: 2pt solid #2c5282;
padding-bottom: 8pt;
}
h2 {
font-size: 14pt;
font-weight: bold;
color: #2d3748;
margin-top: 20pt;
margin-bottom: 8pt;
border-bottom: 1pt solid #cbd5e0;
padding-bottom: 4pt;
page-break-after: avoid;
}
h3 {
font-size: 12pt;
font-weight: bold;
color: #4a5568;
margin-top: 16pt;
margin-bottom: 6pt;
page-break-after: avoid;
}
p {
margin-bottom: 8pt;
text-align: justify;
}
ul {
margin-bottom: 8pt;
margin-left: 20pt;
}
li {
margin-bottom: 3pt;
text-align: justify;
}
strong {
font-weight: bold;
color: #2d3748;
}
.header {
text-align: center;
margin-bottom: 20pt;
padding-bottom: 12pt;
border-bottom: 1pt solid #e2e8f0;
}
.header h1 {
margin-bottom: 4pt;
}
.header p {
font-size: 9pt;
color: #718096;
margin: 0;
}
.footer {
text-align: center;
margin-top: 20pt;
padding-top: 12pt;
border-top: 1pt solid #e2e8f0;
font-size: 8pt;
color: #718096;
}
.section {
margin-bottom: 16pt;
page-break-inside: avoid;
}
.financial-table {
width: 100%;
border-collapse: collapse;
margin: 8pt 0;
font-size: 9pt;
}
.financial-table th,
.financial-table td {
border: 1pt solid #cbd5e0;
padding: 4pt;
text-align: left;
}
.financial-table th {
background-color: #f7fafc;
font-weight: bold;
color: #2d3748;
}
.page-break {
page-break-before: always;
}
.avoid-break {
page-break-inside: avoid;
}
</style>
</head>
<body>
<div class="header">
<h1>CIM Review Summary</h1>
<p>Generated on ${new Date().toLocaleDateString()}</p>
</div>
<div class="content">
${html}
</div>
<div class="footer">
<p>BPCP CIM Document Processor | Confidential</p>
</div>
</body>
</html>
`;
}
/**
* Generate PDF from markdown content
*/
async generatePDFFromMarkdown(
markdown: string,
outputPath: string,
options: PDFGenerationOptions = {}
): Promise<boolean> {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
// Convert markdown to HTML
const html = this.markdownToHTML(markdown);
// Set content
await page.setContent(html, {
waitUntil: 'networkidle0',
});
// Ensure output directory exists
const outputDir = path.dirname(outputPath);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Generate PDF
const pdfOptions = {
...this.defaultOptions,
...options,
path: outputPath,
};
await page.pdf(pdfOptions);
logger.info(`PDF generated successfully: ${outputPath}`);
return true;
} catch (error) {
logger.error(`PDF generation failed: ${outputPath}`, error);
return false;
} finally {
await page.close();
}
}
/**
* Generate PDF from markdown and return as buffer
*/
async generatePDFBuffer(markdown: string, options: PDFGenerationOptions = {}): Promise<Buffer | null> {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
// Convert markdown to HTML
const html = this.markdownToHTML(markdown);
// Set content
await page.setContent(html, {
waitUntil: 'networkidle0',
});
// Generate PDF as buffer
const pdfOptions = {
...this.defaultOptions,
...options,
};
const buffer = await page.pdf(pdfOptions);
logger.info('PDF buffer generated successfully');
return buffer;
} catch (error) {
logger.error('PDF buffer generation failed', error);
return null;
} finally {
await page.close();
}
}
/**
* Generate PDF from HTML file
*/
async generatePDFFromHTML(
htmlPath: string,
outputPath: string,
options: PDFGenerationOptions = {}
): Promise<boolean> {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
// Navigate to HTML file
await page.goto(`file://${htmlPath}`, {
waitUntil: 'networkidle0',
});
// Ensure output directory exists
const outputDir = path.dirname(outputPath);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Generate PDF
const pdfOptions = {
...this.defaultOptions,
...options,
path: outputPath,
};
await page.pdf(pdfOptions);
logger.info(`PDF generated from HTML: ${outputPath}`);
return true;
} catch (error) {
logger.error(`PDF generation from HTML failed: ${outputPath}`, error);
return false;
} finally {
await page.close();
}
}
/**
* Generate PDF from URL
*/
async generatePDFFromURL(
url: string,
outputPath: string,
options: PDFGenerationOptions = {}
): Promise<boolean> {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
// Navigate to URL
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000,
});
// Ensure output directory exists
const outputDir = path.dirname(outputPath);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Generate PDF
const pdfOptions = {
...this.defaultOptions,
...options,
path: outputPath,
};
await page.pdf(pdfOptions);
logger.info(`PDF generated from URL: ${outputPath}`);
return true;
} catch (error) {
logger.error(`PDF generation from URL failed: ${outputPath}`, error);
return false;
} finally {
await page.close();
}
}
/**
* Validate PDF file
*/
async validatePDF(filePath: string): Promise<boolean> {
try {
const buffer = fs.readFileSync(filePath);
// Check if file starts with PDF magic number
const pdfHeader = buffer.toString('ascii', 0, 4);
if (pdfHeader !== '%PDF') {
return false;
}
// Check file size
const stats = fs.statSync(filePath);
if (stats.size < 100) {
return false;
}
return true;
} catch (error) {
logger.error(`PDF validation failed: ${filePath}`, error);
return false;
}
}
/**
* Get PDF metadata
*/
async getPDFMetadata(filePath: string): Promise<any> {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
await page.goto(`file://${filePath}`, {
waitUntil: 'networkidle0',
});
const metadata = await page.evaluate(() => {
return {
title: 'PDF Document',
url: 'file://',
pageCount: 1, // This would need to be calculated differently
};
});
return metadata;
} catch (error) {
logger.error(`Failed to get PDF metadata: ${filePath}`, error);
return null;
} finally {
await page.close();
}
}
/**
* Generate CIM Review PDF from analysis data
*/
async generateCIMReviewPDF(analysisData: any): Promise<Buffer> {
try {
// Convert analysis data to HTML
const html = this.generateCIMReviewHTML(analysisData);
// Generate PDF buffer
const pdfBuffer = await this.generatePDFBuffer(html, {
format: 'A4',
margin: {
top: '0.5in',
right: '0.5in',
bottom: '0.5in',
left: '0.5in',
},
displayHeaderFooter: true,
printBackground: true,
});
if (!pdfBuffer) {
throw new Error('Failed to generate PDF buffer');
}
return pdfBuffer;
} catch (error) {
logger.error('Failed to generate CIM Review PDF', error);
throw error;
}
}
/**
* Generate HTML from CIM Review analysis data
*/
private generateCIMReviewHTML(analysisData: any): string {
const sections = [
{ title: 'Deal Overview', data: analysisData.dealOverview },
{ title: 'Business Description', data: analysisData.businessDescription },
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis },
{ title: 'Financial Summary', data: analysisData.financialSummary },
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview },
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis },
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps },
];
let html = `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>CIM Review Report</title>
<style>
body { font-family: Arial, sans-serif; line-height: 1.6; margin: 0; padding: 20px; }
h1 { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
h2 { color: #34495e; margin-top: 30px; margin-bottom: 15px; }
h3 { color: #7f8c8d; margin-top: 20px; margin-bottom: 10px; }
.section { margin-bottom: 25px; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #2c3e50; }
.field-value { margin-left: 10px; }
.financial-table { width: 100%; border-collapse: collapse; margin: 10px 0; }
.financial-table th, .financial-table td { border: 1px solid #ddd; padding: 8px; text-align: left; }
.financial-table th { background-color: #f8f9fa; font-weight: bold; }
</style>
</head>
<body>
<h1>CIM Review Report</h1>
`;
sections.forEach(section => {
if (section.data) {
html += `<div class="section"><h2>${section.title}</h2>`;
Object.entries(section.data).forEach(([key, value]) => {
if (value && typeof value === 'object' && !Array.isArray(value)) {
// Handle nested objects
html += `<h3>${this.formatFieldName(key)}</h3>`;
Object.entries(value).forEach(([subKey, subValue]) => {
if (subValue) {
html += `
<div class="field">
<span class="field-label">${this.formatFieldName(subKey)}:</span>
<span class="field-value">${subValue}</span>
</div>
`;
}
});
} else if (key === 'financials' && typeof value === 'object') {
// Handle financial table
html += `<h3>Financial Data</h3>`;
html += `<table class="financial-table">`;
html += `<tr><th>Period</th><th>Revenue</th><th>Growth</th><th>EBITDA</th><th>Margin</th></tr>`;
const periods = ['fy3', 'fy2', 'fy1', 'ltm'];
periods.forEach(period => {
if (value && typeof value === 'object' && value[period as keyof typeof value]) {
const data = value[period as keyof typeof value] as any;
html += `
<tr>
<td>${period.toUpperCase()}</td>
<td>${data?.revenue || '-'}</td>
<td>${data?.revenueGrowth || '-'}</td>
<td>${data?.ebitda || '-'}</td>
<td>${data?.ebitdaMargin || '-'}</td>
</tr>
`;
}
});
html += `</table>`;
} else if (value) {
// Handle simple fields
html += `
<div class="field">
<span class="field-label">${this.formatFieldName(key)}:</span>
<span class="field-value">${value}</span>
</div>
`;
}
});
html += `</div>`;
}
});
html += `
</body>
</html>
`;
return html;
}
/**
* Format field names for display
*/
private formatFieldName(fieldName: string): string {
return fieldName
.replace(/([A-Z])/g, ' $1')
.replace(/^./, str => str.toUpperCase())
.replace(/([A-Z]{2,})/g, match => match.charAt(0) + match.slice(1).toLowerCase());
}
/**
* Close browser instance
*/
async close(): Promise<void> {
if (this.browser) {
await this.browser.close();
this.browser = null;
}
}
/**
* Clean up temporary files
*/
async cleanup(): Promise<void> {
await this.close();
}
}
export const pdfGenerationService = new PDFGenerationService();
export default pdfGenerationService;