631 lines
17 KiB
TypeScript
631 lines
17 KiB
TypeScript
// Mock puppeteer in test environment
|
|
let puppeteer: any;
|
|
try {
|
|
puppeteer = require('puppeteer');
|
|
} catch (error) {
|
|
// Mock puppeteer for test environment
|
|
puppeteer = {
|
|
launch: async () => ({
|
|
newPage: async () => ({
|
|
setContent: async () => {},
|
|
pdf: async () => {},
|
|
close: async () => {},
|
|
evaluate: async () => ({ title: 'Test', url: 'test://' }),
|
|
goto: async () => {},
|
|
}),
|
|
close: async () => {},
|
|
}),
|
|
};
|
|
}
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import { logger } from '../utils/logger';
|
|
|
|
export interface PDFGenerationOptions {
|
|
format?: 'A4' | 'Letter';
|
|
margin?: {
|
|
top: string;
|
|
right: string;
|
|
bottom: string;
|
|
left: string;
|
|
};
|
|
headerTemplate?: string;
|
|
footerTemplate?: string;
|
|
displayHeaderFooter?: boolean;
|
|
printBackground?: boolean;
|
|
}
|
|
|
|
class PDFGenerationService {
|
|
private browser: any = null;
|
|
private readonly defaultOptions: PDFGenerationOptions = {
|
|
format: 'A4',
|
|
margin: {
|
|
top: '1in',
|
|
right: '1in',
|
|
bottom: '1in',
|
|
left: '1in',
|
|
},
|
|
displayHeaderFooter: true,
|
|
printBackground: true,
|
|
};
|
|
|
|
/**
|
|
* Initialize the browser instance
|
|
*/
|
|
private async getBrowser(): Promise<any> {
|
|
if (!this.browser) {
|
|
this.browser = await puppeteer.launch({
|
|
headless: 'new',
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-accelerated-2d-canvas',
|
|
'--no-first-run',
|
|
'--no-zygote',
|
|
'--disable-gpu',
|
|
],
|
|
});
|
|
}
|
|
return this.browser;
|
|
}
|
|
|
|
/**
|
|
* Convert markdown to HTML
|
|
*/
|
|
private markdownToHTML(markdown: string): string {
|
|
// Enhanced markdown to HTML conversion with table support
|
|
let html = markdown
|
|
// Headers
|
|
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
|
|
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
|
|
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
|
|
// Bold
|
|
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
|
// Italic
|
|
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
|
// Lists
|
|
.replace(/^- (.*$)/gim, '<li>$1</li>')
|
|
// Paragraphs (but preserve tables)
|
|
.replace(/\n\n/g, '</p><p>')
|
|
.replace(/^(.+)$/gm, '<p>$1</p>');
|
|
|
|
// Wrap lists properly
|
|
html = html.replace(/<li>(.*?)<\/li>/g, '<ul><li>$1</li></ul>');
|
|
html = html.replace(/<\/ul>\s*<ul>/g, '');
|
|
|
|
// Preserve HTML tables by removing paragraph tags around them
|
|
html = html.replace(/<p><table/g, '<table');
|
|
html = html.replace(/<\/table><\/p>/g, '</table>');
|
|
html = html.replace(/<p><\/table>/g, '</table>');
|
|
html = html.replace(/<p><table/g, '<table');
|
|
|
|
return `
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>CIM Review Summary</title>
|
|
<style>
|
|
@page {
|
|
margin: 0.75in;
|
|
size: A4;
|
|
}
|
|
|
|
body {
|
|
font-family: 'Times New Roman', serif;
|
|
font-size: 10pt;
|
|
line-height: 1.4;
|
|
color: #2c3e50;
|
|
margin: 0;
|
|
padding: 0;
|
|
}
|
|
|
|
h1 {
|
|
font-size: 18pt;
|
|
font-weight: bold;
|
|
color: #1a365d;
|
|
text-align: center;
|
|
margin-bottom: 8pt;
|
|
border-bottom: 2pt solid #2c5282;
|
|
padding-bottom: 8pt;
|
|
}
|
|
|
|
h2 {
|
|
font-size: 14pt;
|
|
font-weight: bold;
|
|
color: #2d3748;
|
|
margin-top: 20pt;
|
|
margin-bottom: 8pt;
|
|
border-bottom: 1pt solid #cbd5e0;
|
|
padding-bottom: 4pt;
|
|
page-break-after: avoid;
|
|
}
|
|
|
|
h3 {
|
|
font-size: 12pt;
|
|
font-weight: bold;
|
|
color: #4a5568;
|
|
margin-top: 16pt;
|
|
margin-bottom: 6pt;
|
|
page-break-after: avoid;
|
|
}
|
|
|
|
p {
|
|
margin-bottom: 8pt;
|
|
text-align: justify;
|
|
}
|
|
|
|
ul {
|
|
margin-bottom: 8pt;
|
|
margin-left: 20pt;
|
|
}
|
|
|
|
li {
|
|
margin-bottom: 3pt;
|
|
text-align: justify;
|
|
}
|
|
|
|
strong {
|
|
font-weight: bold;
|
|
color: #2d3748;
|
|
}
|
|
|
|
.header {
|
|
text-align: center;
|
|
margin-bottom: 20pt;
|
|
padding-bottom: 12pt;
|
|
border-bottom: 1pt solid #e2e8f0;
|
|
}
|
|
|
|
.header h1 {
|
|
margin-bottom: 4pt;
|
|
}
|
|
|
|
.header p {
|
|
font-size: 9pt;
|
|
color: #718096;
|
|
margin: 0;
|
|
}
|
|
|
|
.footer {
|
|
text-align: center;
|
|
margin-top: 20pt;
|
|
padding-top: 12pt;
|
|
border-top: 1pt solid #e2e8f0;
|
|
font-size: 8pt;
|
|
color: #718096;
|
|
}
|
|
|
|
.section {
|
|
margin-bottom: 16pt;
|
|
page-break-inside: avoid;
|
|
}
|
|
|
|
.financial-table {
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
margin: 8pt 0;
|
|
font-size: 9pt;
|
|
}
|
|
|
|
.financial-table th,
|
|
.financial-table td {
|
|
border: 1pt solid #cbd5e0;
|
|
padding: 4pt;
|
|
text-align: left;
|
|
}
|
|
|
|
.financial-table th {
|
|
background-color: #f7fafc;
|
|
font-weight: bold;
|
|
color: #2d3748;
|
|
}
|
|
|
|
.page-break {
|
|
page-break-before: always;
|
|
}
|
|
|
|
.avoid-break {
|
|
page-break-inside: avoid;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="header">
|
|
<h1>CIM Review Summary</h1>
|
|
<p>Generated on ${new Date().toLocaleDateString()}</p>
|
|
</div>
|
|
<div class="content">
|
|
${html}
|
|
</div>
|
|
<div class="footer">
|
|
<p>BPCP CIM Document Processor | Confidential</p>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
`;
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from markdown content
|
|
*/
|
|
async generatePDFFromMarkdown(
|
|
markdown: string,
|
|
outputPath: string,
|
|
options: PDFGenerationOptions = {}
|
|
): Promise<boolean> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
// Convert markdown to HTML
|
|
const html = this.markdownToHTML(markdown);
|
|
|
|
// Set content
|
|
await page.setContent(html, {
|
|
waitUntil: 'networkidle0',
|
|
});
|
|
|
|
// Ensure output directory exists
|
|
const outputDir = path.dirname(outputPath);
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate PDF
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
path: outputPath,
|
|
};
|
|
|
|
await page.pdf(pdfOptions);
|
|
|
|
logger.info(`PDF generated successfully: ${outputPath}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF generation failed: ${outputPath}`, error);
|
|
return false;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from markdown and return as buffer
|
|
*/
|
|
async generatePDFBuffer(markdown: string, options: PDFGenerationOptions = {}): Promise<Buffer | null> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
// Convert markdown to HTML
|
|
const html = this.markdownToHTML(markdown);
|
|
|
|
// Set content
|
|
await page.setContent(html, {
|
|
waitUntil: 'networkidle0',
|
|
});
|
|
|
|
// Generate PDF as buffer
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
};
|
|
|
|
const buffer = await page.pdf(pdfOptions);
|
|
|
|
logger.info('PDF buffer generated successfully');
|
|
return buffer;
|
|
} catch (error) {
|
|
logger.error('PDF buffer generation failed', error);
|
|
return null;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from HTML file
|
|
*/
|
|
async generatePDFFromHTML(
|
|
htmlPath: string,
|
|
outputPath: string,
|
|
options: PDFGenerationOptions = {}
|
|
): Promise<boolean> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
// Navigate to HTML file
|
|
await page.goto(`file://${htmlPath}`, {
|
|
waitUntil: 'networkidle0',
|
|
});
|
|
|
|
// Ensure output directory exists
|
|
const outputDir = path.dirname(outputPath);
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate PDF
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
path: outputPath,
|
|
};
|
|
|
|
await page.pdf(pdfOptions);
|
|
|
|
logger.info(`PDF generated from HTML: ${outputPath}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF generation from HTML failed: ${outputPath}`, error);
|
|
return false;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from URL
|
|
*/
|
|
async generatePDFFromURL(
|
|
url: string,
|
|
outputPath: string,
|
|
options: PDFGenerationOptions = {}
|
|
): Promise<boolean> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
// Navigate to URL
|
|
await page.goto(url, {
|
|
waitUntil: 'networkidle0',
|
|
timeout: 30000,
|
|
});
|
|
|
|
// Ensure output directory exists
|
|
const outputDir = path.dirname(outputPath);
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate PDF
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
path: outputPath,
|
|
};
|
|
|
|
await page.pdf(pdfOptions);
|
|
|
|
logger.info(`PDF generated from URL: ${outputPath}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF generation from URL failed: ${outputPath}`, error);
|
|
return false;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate PDF file
|
|
*/
|
|
async validatePDF(filePath: string): Promise<boolean> {
|
|
try {
|
|
const buffer = fs.readFileSync(filePath);
|
|
|
|
// Check if file starts with PDF magic number
|
|
const pdfHeader = buffer.toString('ascii', 0, 4);
|
|
if (pdfHeader !== '%PDF') {
|
|
return false;
|
|
}
|
|
|
|
// Check file size
|
|
const stats = fs.statSync(filePath);
|
|
if (stats.size < 100) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF validation failed: ${filePath}`, error);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get PDF metadata
|
|
*/
|
|
async getPDFMetadata(filePath: string): Promise<any> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
await page.goto(`file://${filePath}`, {
|
|
waitUntil: 'networkidle0',
|
|
});
|
|
|
|
const metadata = await page.evaluate(() => {
|
|
return {
|
|
title: 'PDF Document',
|
|
url: 'file://',
|
|
pageCount: 1, // This would need to be calculated differently
|
|
};
|
|
});
|
|
|
|
return metadata;
|
|
} catch (error) {
|
|
logger.error(`Failed to get PDF metadata: ${filePath}`, error);
|
|
return null;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate CIM Review PDF from analysis data
|
|
*/
|
|
async generateCIMReviewPDF(analysisData: any): Promise<Buffer> {
|
|
try {
|
|
// Convert analysis data to HTML
|
|
const html = this.generateCIMReviewHTML(analysisData);
|
|
|
|
// Generate PDF buffer
|
|
const pdfBuffer = await this.generatePDFBuffer(html, {
|
|
format: 'A4',
|
|
margin: {
|
|
top: '0.5in',
|
|
right: '0.5in',
|
|
bottom: '0.5in',
|
|
left: '0.5in',
|
|
},
|
|
displayHeaderFooter: true,
|
|
printBackground: true,
|
|
});
|
|
|
|
if (!pdfBuffer) {
|
|
throw new Error('Failed to generate PDF buffer');
|
|
}
|
|
|
|
return pdfBuffer;
|
|
} catch (error) {
|
|
logger.error('Failed to generate CIM Review PDF', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate HTML from CIM Review analysis data
|
|
*/
|
|
private generateCIMReviewHTML(analysisData: any): string {
|
|
const sections = [
|
|
{ title: 'Deal Overview', data: analysisData.dealOverview },
|
|
{ title: 'Business Description', data: analysisData.businessDescription },
|
|
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis },
|
|
{ title: 'Financial Summary', data: analysisData.financialSummary },
|
|
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview },
|
|
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis },
|
|
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps },
|
|
];
|
|
|
|
let html = `
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>CIM Review Report</title>
|
|
<style>
|
|
body { font-family: Arial, sans-serif; line-height: 1.6; margin: 0; padding: 20px; }
|
|
h1 { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
|
|
h2 { color: #34495e; margin-top: 30px; margin-bottom: 15px; }
|
|
h3 { color: #7f8c8d; margin-top: 20px; margin-bottom: 10px; }
|
|
.section { margin-bottom: 25px; }
|
|
.field { margin-bottom: 10px; }
|
|
.field-label { font-weight: bold; color: #2c3e50; }
|
|
.field-value { margin-left: 10px; }
|
|
.financial-table { width: 100%; border-collapse: collapse; margin: 10px 0; }
|
|
.financial-table th, .financial-table td { border: 1px solid #ddd; padding: 8px; text-align: left; }
|
|
.financial-table th { background-color: #f8f9fa; font-weight: bold; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>CIM Review Report</h1>
|
|
`;
|
|
|
|
sections.forEach(section => {
|
|
if (section.data) {
|
|
html += `<div class="section"><h2>${section.title}</h2>`;
|
|
|
|
Object.entries(section.data).forEach(([key, value]) => {
|
|
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
|
// Handle nested objects
|
|
html += `<h3>${this.formatFieldName(key)}</h3>`;
|
|
Object.entries(value).forEach(([subKey, subValue]) => {
|
|
if (subValue) {
|
|
html += `
|
|
<div class="field">
|
|
<span class="field-label">${this.formatFieldName(subKey)}:</span>
|
|
<span class="field-value">${subValue}</span>
|
|
</div>
|
|
`;
|
|
}
|
|
});
|
|
} else if (key === 'financials' && typeof value === 'object') {
|
|
// Handle financial table
|
|
html += `<h3>Financial Data</h3>`;
|
|
html += `<table class="financial-table">`;
|
|
html += `<tr><th>Period</th><th>Revenue</th><th>Growth</th><th>EBITDA</th><th>Margin</th></tr>`;
|
|
|
|
const periods = ['fy3', 'fy2', 'fy1', 'ltm'];
|
|
periods.forEach(period => {
|
|
if (value && typeof value === 'object' && value[period as keyof typeof value]) {
|
|
const data = value[period as keyof typeof value] as any;
|
|
html += `
|
|
<tr>
|
|
<td>${period.toUpperCase()}</td>
|
|
<td>${data?.revenue || '-'}</td>
|
|
<td>${data?.revenueGrowth || '-'}</td>
|
|
<td>${data?.ebitda || '-'}</td>
|
|
<td>${data?.ebitdaMargin || '-'}</td>
|
|
</tr>
|
|
`;
|
|
}
|
|
});
|
|
html += `</table>`;
|
|
} else if (value) {
|
|
// Handle simple fields
|
|
html += `
|
|
<div class="field">
|
|
<span class="field-label">${this.formatFieldName(key)}:</span>
|
|
<span class="field-value">${value}</span>
|
|
</div>
|
|
`;
|
|
}
|
|
});
|
|
|
|
html += `</div>`;
|
|
}
|
|
});
|
|
|
|
html += `
|
|
</body>
|
|
</html>
|
|
`;
|
|
|
|
return html;
|
|
}
|
|
|
|
/**
|
|
* Format field names for display
|
|
*/
|
|
private formatFieldName(fieldName: string): string {
|
|
return fieldName
|
|
.replace(/([A-Z])/g, ' $1')
|
|
.replace(/^./, str => str.toUpperCase())
|
|
.replace(/([A-Z]{2,})/g, match => match.charAt(0) + match.slice(1).toLowerCase());
|
|
}
|
|
|
|
/**
|
|
* Close browser instance
|
|
*/
|
|
async close(): Promise<void> {
|
|
if (this.browser) {
|
|
await this.browser.close();
|
|
this.browser = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean up temporary files
|
|
*/
|
|
async cleanup(): Promise<void> {
|
|
await this.close();
|
|
}
|
|
}
|
|
|
|
export const pdfGenerationService = new PDFGenerationService();
|
|
export default pdfGenerationService;
|