// Mock puppeteer in test environment
let puppeteer: any;
try {
puppeteer = require('puppeteer');
} catch (error) {
// Mock puppeteer for test environment
puppeteer = {
launch: async () => ({
newPage: async () => ({
setContent: async () => {},
pdf: async () => {},
close: async () => {},
evaluate: async () => ({ title: 'Test', url: 'test://' }),
goto: async () => {},
}),
close: async () => {},
}),
};
}
// Import PDFKit for fallback PDF generation
let PDFDocument: any;
try {
PDFDocument = require('pdfkit');
} catch (error) {
// Mock PDFKit for test environment
PDFDocument = class MockPDFDocument {
constructor() {}
pipe() { return this; }
end() { return this; }
font() { return this; }
fontSize() { return this; }
text() { return this; }
moveDown() { return this; }
addPage() { return this; }
};
}
import fs from 'fs';
import path from 'path';
import { logger } from '../utils/logger';
import { memoryMonitorService } from './memoryMonitorService';
export interface PDFGenerationOptions {
format?: 'A4' | 'Letter';
margin?: {
top: string;
right: string;
bottom: string;
left: string;
};
headerTemplate?: string;
footerTemplate?: string;
displayHeaderFooter?: boolean;
printBackground?: boolean;
quality?: 'low' | 'medium' | 'high';
timeout?: number;
}
interface PagePool {
page: any;
inUse: boolean;
lastUsed: number;
}
class PDFGenerationService {
private browser: any = null;
private pagePool: PagePool[] = [];
private readonly maxPoolSize = 5;
private readonly pageTimeout = 30000; // 30 seconds
private readonly cache = new Map();
private readonly cacheTimeout = 300000; // 5 minutes
private readonly defaultOptions: PDFGenerationOptions = {
format: 'A4',
margin: {
top: '1in',
right: '1in',
bottom: '1in',
left: '1in',
},
displayHeaderFooter: true,
printBackground: true,
quality: 'high',
timeout: 30000,
};
/**
* Initialize the browser instance
*/
private async getBrowser(): Promise {
if (!this.browser) {
const launchOptions: any = {
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
],
};
// For Firebase Functions environment, use the bundled Chrome
if (process.env.FUNCTIONS_EMULATOR || process.env.FIREBASE_FUNCTIONS) {
launchOptions.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable';
}
this.browser = await puppeteer.launch(launchOptions);
}
return this.browser;
}
/**
* Get a page from the pool or create a new one
*/
private async getPage(): Promise {
// Clean up expired pages
this.cleanupExpiredPages();
// Try to find an available page in the pool
const availablePage = this.pagePool.find(p => !p.inUse);
if (availablePage) {
availablePage.inUse = true;
availablePage.lastUsed = Date.now();
return availablePage.page;
}
// Create a new page if pool is not full
if (this.pagePool.length < this.maxPoolSize) {
const browser = await this.getBrowser();
const page = await browser.newPage();
// Optimize page settings
await page.setViewport({ width: 1200, height: 800 });
await page.setCacheEnabled(false);
const pagePoolItem: PagePool = {
page,
inUse: true,
lastUsed: Date.now(),
};
this.pagePool.push(pagePoolItem);
return page;
}
// Wait for a page to become available
return new Promise((resolve) => {
const checkForAvailablePage = () => {
const availablePage = this.pagePool.find(p => !p.inUse);
if (availablePage) {
availablePage.inUse = true;
availablePage.lastUsed = Date.now();
resolve(availablePage.page);
} else {
setTimeout(checkForAvailablePage, 100);
}
};
checkForAvailablePage();
});
}
/**
* Release a page back to the pool
*/
private releasePage(page: any): void {
const pagePoolItem = this.pagePool.find(p => p.page === page);
if (pagePoolItem) {
pagePoolItem.inUse = false;
}
}
/**
* Clean up expired pages from the pool
*/
private cleanupExpiredPages(): void {
const now = Date.now();
this.pagePool = this.pagePool.filter(poolItem => {
if (now - poolItem.lastUsed > this.pageTimeout) {
if (!poolItem.inUse) {
poolItem.page.close().catch(err =>
logger.error('Error closing expired page:', err)
);
return false;
}
}
return true;
});
}
/**
* Generate cache key for content
*/
private generateCacheKey(content: string, options: PDFGenerationOptions): string {
const optionsHash = JSON.stringify(options);
return Buffer.from(content + optionsHash).toString('base64').substring(0, 32);
}
/**
* Check cache for existing PDF
*/
private getCachedPDF(cacheKey: string): Buffer | null {
const cached = this.cache.get(cacheKey);
if (cached && Date.now() - cached.timestamp < this.cacheTimeout) {
logger.info('PDF served from cache');
return cached.buffer;
}
if (cached) {
this.cache.delete(cacheKey);
}
return null;
}
/**
* Cache PDF buffer
*/
private cachePDF(cacheKey: string, buffer: Buffer): void {
// Limit cache size
if (this.cache.size > 100) {
const oldestKey = this.cache.keys().next().value;
if (oldestKey) {
this.cache.delete(oldestKey);
}
}
this.cache.set(cacheKey, { buffer, timestamp: Date.now() });
}
/**
* Convert markdown to HTML
*/
private markdownToHTML(markdown: string): string {
// Enhanced markdown to HTML conversion with table support
let html = markdown
// Headers
.replace(/^### (.*$)/gim, '$1
')
.replace(/^## (.*$)/gim, '$1
')
.replace(/^# (.*$)/gim, '$1
')
// Bold
.replace(/\*\*(.*?)\*\*/g, '$1')
// Italic
.replace(/\*(.*?)\*/g, '$1')
// Lists
.replace(/^- (.*$)/gim, '$1')
// Paragraphs (but preserve tables)
.replace(/\n\n/g, '
')
.replace(/^(.+)$/gm, '
$1
');
// Wrap lists properly
html = html.replace(/(.*?)<\/li>/g, '');
html = html.replace(/<\/ul>\s*/g, '');
// Preserve HTML tables by removing paragraph tags around them
html = html.replace(/');
html = html.replace(/<\/table>/g, '');
html = html.replace(/
CIM Review Summary
${html}
`;
}
/**
* Generate PDF from markdown content
*/
async generatePDFFromMarkdown(
markdown: string,
outputPath: string,
options: PDFGenerationOptions = {}
): Promise {
const page = await this.getPage();
try {
// Convert markdown to HTML
const html = this.markdownToHTML(markdown);
// Set content with timeout
await page.setContent(html, {
waitUntil: 'networkidle0',
timeout: options.timeout || this.defaultOptions.timeout,
});
// Ensure output directory exists
const outputDir = path.dirname(outputPath);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Generate PDF
const pdfOptions = {
...this.defaultOptions,
...options,
path: outputPath,
};
await page.pdf(pdfOptions);
logger.info(`PDF generated successfully: ${outputPath}`);
return true;
} catch (error) {
logger.error(`PDF generation failed: ${outputPath}`, error);
return false;
} finally {
this.releasePage(page);
}
}
/**
* Generate PDF from markdown and return as buffer
*/
async generatePDFBuffer(markdown: string, options: PDFGenerationOptions = {}): Promise {
// Check cache first
const cacheKey = this.generateCacheKey(markdown, options);
const cached = this.getCachedPDF(cacheKey);
if (cached) {
return cached;
}
return memoryMonitorService.monitorOperation(
'PDF Generation',
async () => {
const page = await this.getPage();
try {
// Convert markdown to HTML
const html = this.markdownToHTML(markdown);
// Set content with timeout
await page.setContent(html, {
waitUntil: 'networkidle0',
timeout: options.timeout || this.defaultOptions.timeout,
});
// Generate PDF as buffer
const pdfOptions = {
...this.defaultOptions,
...options,
};
const buffer = await page.pdf(pdfOptions);
// Cache the result
this.cachePDF(cacheKey, buffer);
logger.info('PDF buffer generated successfully');
return buffer;
} catch (error) {
logger.error('PDF buffer generation failed', error);
return null;
} finally {
this.releasePage(page);
}
}
);
}
/**
* Generate PDF from HTML file
*/
async generatePDFFromHTML(
htmlPath: string,
outputPath: string,
options: PDFGenerationOptions = {}
): Promise {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
// Navigate to HTML file
await page.goto(`file://${htmlPath}`, {
waitUntil: 'networkidle0',
});
// Ensure output directory exists
const outputDir = path.dirname(outputPath);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Generate PDF
const pdfOptions = {
...this.defaultOptions,
...options,
path: outputPath,
};
await page.pdf(pdfOptions);
logger.info(`PDF generated from HTML: ${outputPath}`);
return true;
} catch (error) {
logger.error(`PDF generation from HTML failed: ${outputPath}`, error);
return false;
} finally {
await page.close();
}
}
/**
* Generate PDF from URL
*/
async generatePDFFromURL(
url: string,
outputPath: string,
options: PDFGenerationOptions = {}
): Promise {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
// Navigate to URL
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000,
});
// Ensure output directory exists
const outputDir = path.dirname(outputPath);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Generate PDF
const pdfOptions = {
...this.defaultOptions,
...options,
path: outputPath,
};
await page.pdf(pdfOptions);
logger.info(`PDF generated from URL: ${outputPath}`);
return true;
} catch (error) {
logger.error(`PDF generation from URL failed: ${outputPath}`, error);
return false;
} finally {
await page.close();
}
}
/**
* Validate PDF file
*/
async validatePDF(filePath: string): Promise {
try {
const buffer = fs.readFileSync(filePath);
// Check if file starts with PDF magic number
const pdfHeader = buffer.toString('ascii', 0, 4);
if (pdfHeader !== '%PDF') {
return false;
}
// Check file size
const stats = fs.statSync(filePath);
if (stats.size < 100) {
return false;
}
return true;
} catch (error) {
logger.error(`PDF validation failed: ${filePath}`, error);
return false;
}
}
/**
* Get PDF metadata
*/
async getPDFMetadata(filePath: string): Promise {
const browser = await this.getBrowser();
const page = await browser.newPage();
try {
await page.goto(`file://${filePath}`, {
waitUntil: 'networkidle0',
});
const metadata = await page.evaluate(() => {
return {
title: 'PDF Document',
url: 'file://',
pageCount: 1, // This would need to be calculated differently
};
});
return metadata;
} catch (error) {
logger.error(`Failed to get PDF metadata: ${filePath}`, error);
return null;
} finally {
await page.close();
}
}
/**
* Generate CIM Review PDF from analysis data
*/
async generateCIMReviewPDF(analysisData: any): Promise {
try {
// Convert analysis data to HTML
const html = this.generateCIMReviewHTML(analysisData);
// Try to generate PDF with Puppeteer first
const page = await this.getPage();
try {
await page.setContent(html, { waitUntil: 'networkidle0' });
const pdfBuffer = await page.pdf({
format: 'A4',
margin: {
top: '0.5in',
right: '0.5in',
bottom: '0.5in',
left: '0.5in',
},
displayHeaderFooter: true,
printBackground: true,
});
this.releasePage(page);
return pdfBuffer;
} catch (puppeteerError) {
this.releasePage(page);
throw puppeteerError;
}
} catch (error) {
logger.error('Failed to generate CIM Review PDF with Puppeteer, trying fallback method', error);
// Fallback: Generate a simple text-based PDF without Chrome
return this.generateSimplePDF(analysisData);
}
}
/**
* Generate a simple PDF using PDFKit (fallback method)
*/
private async generateSimplePDF(analysisData: any): Promise {
try {
return new Promise((resolve, reject) => {
const doc = new PDFDocument({
size: 'A4',
margins: {
top: 50,
bottom: 50,
left: 50,
right: 50
}
});
const chunks: Buffer[] = [];
doc.on('data', (chunk: Buffer) => chunks.push(chunk));
doc.on('end', () => {
const result = Buffer.concat(chunks);
resolve(result);
});
doc.on('error', (error: any) => {
reject(error);
});
// Add header
doc.fontSize(24)
.font('Helvetica-Bold')
.text('BLUEPOINT Capital Partners', { align: 'center' });
doc.moveDown(0.5);
doc.fontSize(18)
.font('Helvetica-Bold')
.text('CIM Review Report', { align: 'center' });
doc.moveDown(0.5);
doc.fontSize(10)
.font('Helvetica')
.text(`Generated: ${new Date().toLocaleDateString()} at ${new Date().toLocaleTimeString()}`, { align: 'center' });
doc.moveDown(2);
// Add content sections
const sections = [
{ title: 'Deal Overview', data: analysisData.dealOverview },
{ title: 'Business Description', data: analysisData.businessDescription },
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis },
{ title: 'Financial Summary', data: analysisData.financialSummary },
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview },
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis },
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps },
];
sections.forEach(section => {
if (section.data) {
// Add section title
doc.fontSize(14)
.font('Helvetica-Bold')
.text(section.title);
doc.moveDown(0.5);
// Add section content
Object.entries(section.data).forEach(([key, value]) => {
if (value && typeof value !== 'object') {
doc.fontSize(10)
.font('Helvetica-Bold')
.text(`${this.formatFieldName(key)}:`, { continued: true });
doc.fontSize(10)
.font('Helvetica')
.text(` ${value}`);
doc.moveDown(0.3);
}
});
doc.moveDown(1);
}
});
// Add footer
doc.moveDown(2);
doc.fontSize(8)
.font('Helvetica')
.text('BLUEPOINT Capital Partners | CIM Document Processor | Confidential', { align: 'center' });
doc.end();
});
} catch (error) {
logger.error('PDFKit PDF generation failed', error);
throw error;
}
}
/**
* Generate HTML from CIM Review analysis data
*/
private generateCIMReviewHTML(analysisData: any): string {
const sections = [
{ title: 'Deal Overview', data: analysisData.dealOverview, icon: '📊' },
{ title: 'Business Description', data: analysisData.businessDescription, icon: '🏢' },
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis, icon: '📈' },
{ title: 'Financial Summary', data: analysisData.financialSummary, icon: '💰' },
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview, icon: '👥' },
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis, icon: '🎯' },
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps, icon: '❓' },
];
let html = `
CIM Review Report
`;
sections.forEach(section => {
if (section.data) {
html += `
${section.icon}${section.title}
`;
Object.entries(section.data).forEach(([key, value]) => {
if (key === 'financials' && typeof value === 'object') {
// Handle financial table specifically
html += `
💰 Financial Data
`;
html += `
`;
html += `| Period | Revenue | Growth | EBITDA | Margin |
`;
html += ``;
const periods = ['fy3', 'fy2', 'fy1', 'ltm'];
periods.forEach(period => {
if (value && typeof value === 'object' && value[period as keyof typeof value]) {
const data = value[period as keyof typeof value] as any;
html += `
| ${period.toUpperCase()} |
${data?.revenue || '-'} |
${data?.revenueGrowth || '-'} |
${data?.ebitda || '-'} |
${data?.ebitdaMargin || '-'} |
`;
}
});
html += `
`;
} else if (value && typeof value === 'object' && !Array.isArray(value)) {
// Handle nested objects (but skip financials since we handled it above)
html += `
📋 ${this.formatFieldName(key)}
`;
Object.entries(value).forEach(([subKey, subValue]) => {
if (subValue && typeof subValue !== 'object') {
html += `
${this.formatFieldName(subKey)}
${subValue}
`;
}
});
} else if (value) {
// Handle simple fields
html += `
${this.formatFieldName(key)}
${value}
`;
}
});
html += `
`;
}
});
html += `
`;
return html;
}
/**
* Get logo as base64 string for embedding in HTML
*/
private getLogoBase64(): string {
try {
const logoPath = path.join(__dirname, '../assets/bluepoint-logo.png');
const logoBuffer = fs.readFileSync(logoPath);
return logoBuffer.toString('base64');
} catch (error) {
logger.error('Failed to load logo:', error);
// Return empty string if logo not found - this will hide the logo but allow PDF generation to continue
return '';
}
}
/**
* Format field names for display
*/
private formatFieldName(fieldName: string): string {
return fieldName
.replace(/([A-Z])/g, ' $1')
.replace(/^./, str => str.toUpperCase())
.replace(/([A-Z]{2,})/g, match => match.charAt(0) + match.slice(1).toLowerCase());
}
/**
* Close browser instance
*/
async close(): Promise {
// Close all pages in the pool
for (const poolItem of this.pagePool) {
try {
await poolItem.page.close();
} catch (error) {
logger.error('Error closing page:', error);
}
}
this.pagePool = [];
// Clear cache
this.cache.clear();
// Close browser
if (this.browser) {
await this.browser.close();
this.browser = null;
}
}
/**
* Clean up temporary files
*/
async cleanup(): Promise {
await this.close();
}
/**
* Get service statistics
*/
getStats(): {
pagePoolSize: number;
cacheSize: number;
activePages: number;
} {
return {
pagePoolSize: this.pagePool.length,
cacheSize: this.cache.size,
activePages: this.pagePool.filter(p => p.inUse).length,
};
}
}
export const pdfGenerationService = new PDFGenerationService();
export default pdfGenerationService;