// Mock puppeteer in test environment let puppeteer: any; try { puppeteer = require('puppeteer'); } catch (error) { // Mock puppeteer for test environment puppeteer = { launch: async () => ({ newPage: async () => ({ setContent: async () => {}, pdf: async () => {}, close: async () => {}, evaluate: async () => ({ title: 'Test', url: 'test://' }), goto: async () => {}, }), close: async () => {}, }), }; } // Import PDFKit for fallback PDF generation let PDFDocument: any; try { PDFDocument = require('pdfkit'); } catch (error) { // Mock PDFKit for test environment PDFDocument = class MockPDFDocument { constructor() {} pipe() { return this; } end() { return this; } font() { return this; } fontSize() { return this; } text() { return this; } moveDown() { return this; } addPage() { return this; } }; } import fs from 'fs'; import path from 'path'; import { logger } from '../utils/logger'; import { memoryMonitorService } from './memoryMonitorService'; export interface PDFGenerationOptions { format?: 'A4' | 'Letter'; margin?: { top: string; right: string; bottom: string; left: string; }; headerTemplate?: string; footerTemplate?: string; displayHeaderFooter?: boolean; printBackground?: boolean; quality?: 'low' | 'medium' | 'high'; timeout?: number; } interface PagePool { page: any; inUse: boolean; lastUsed: number; } class PDFGenerationService { private browser: any = null; private pagePool: PagePool[] = []; private readonly maxPoolSize = 5; private readonly pageTimeout = 30000; // 30 seconds private readonly cache = new Map(); private readonly cacheTimeout = 300000; // 5 minutes private readonly defaultOptions: PDFGenerationOptions = { format: 'A4', margin: { top: '1in', right: '1in', bottom: '1in', left: '1in', }, displayHeaderFooter: true, printBackground: true, quality: 'high', timeout: 30000, }; /** * Initialize the browser instance */ private async getBrowser(): Promise { if (!this.browser) { const launchOptions: any = { headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--disable-gpu', '--disable-background-timer-throttling', '--disable-backgrounding-occluded-windows', '--disable-renderer-backgrounding', ], }; // For Firebase Functions environment, use the bundled Chrome if (process.env.FUNCTIONS_EMULATOR || process.env.FIREBASE_FUNCTIONS) { launchOptions.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable'; } this.browser = await puppeteer.launch(launchOptions); } return this.browser; } /** * Get a page from the pool or create a new one */ private async getPage(): Promise { // Clean up expired pages this.cleanupExpiredPages(); // Try to find an available page in the pool const availablePage = this.pagePool.find(p => !p.inUse); if (availablePage) { availablePage.inUse = true; availablePage.lastUsed = Date.now(); return availablePage.page; } // Create a new page if pool is not full if (this.pagePool.length < this.maxPoolSize) { const browser = await this.getBrowser(); const page = await browser.newPage(); // Optimize page settings await page.setViewport({ width: 1200, height: 800 }); await page.setCacheEnabled(false); const pagePoolItem: PagePool = { page, inUse: true, lastUsed: Date.now(), }; this.pagePool.push(pagePoolItem); return page; } // Wait for a page to become available return new Promise((resolve) => { const checkForAvailablePage = () => { const availablePage = this.pagePool.find(p => !p.inUse); if (availablePage) { availablePage.inUse = true; availablePage.lastUsed = Date.now(); resolve(availablePage.page); } else { setTimeout(checkForAvailablePage, 100); } }; checkForAvailablePage(); }); } /** * Release a page back to the pool */ private releasePage(page: any): void { const pagePoolItem = this.pagePool.find(p => p.page === page); if (pagePoolItem) { pagePoolItem.inUse = false; } } /** * Clean up expired pages from the pool */ private cleanupExpiredPages(): void { const now = Date.now(); this.pagePool = this.pagePool.filter(poolItem => { if (now - poolItem.lastUsed > this.pageTimeout) { if (!poolItem.inUse) { poolItem.page.close().catch(err => logger.error('Error closing expired page:', err) ); return false; } } return true; }); } /** * Generate cache key for content */ private generateCacheKey(content: string, options: PDFGenerationOptions): string { const optionsHash = JSON.stringify(options); return Buffer.from(content + optionsHash).toString('base64').substring(0, 32); } /** * Check cache for existing PDF */ private getCachedPDF(cacheKey: string): Buffer | null { const cached = this.cache.get(cacheKey); if (cached && Date.now() - cached.timestamp < this.cacheTimeout) { logger.info('PDF served from cache'); return cached.buffer; } if (cached) { this.cache.delete(cacheKey); } return null; } /** * Cache PDF buffer */ private cachePDF(cacheKey: string, buffer: Buffer): void { // Limit cache size if (this.cache.size > 100) { const oldestKey = this.cache.keys().next().value; if (oldestKey) { this.cache.delete(oldestKey); } } this.cache.set(cacheKey, { buffer, timestamp: Date.now() }); } /** * Convert markdown to HTML */ private markdownToHTML(markdown: string): string { // Enhanced markdown to HTML conversion with table support let html = markdown // Headers .replace(/^### (.*$)/gim, '

$1

') .replace(/^## (.*$)/gim, '

$1

') .replace(/^# (.*$)/gim, '

$1

') // Bold .replace(/\*\*(.*?)\*\*/g, '$1') // Italic .replace(/\*(.*?)\*/g, '$1') // Lists .replace(/^- (.*$)/gim, '
  • $1
  • ') // Paragraphs (but preserve tables) .replace(/\n\n/g, '

    ') .replace(/^(.+)$/gm, '

    $1

    '); // Wrap lists properly html = html.replace(/
  • (.*?)<\/li>/g, '
    • $1
    '); html = html.replace(/<\/ul>\s*
      /g, ''); // Preserve HTML tables by removing paragraph tags around them html = html.replace(/

      <\/p>/g, '
      '); html = html.replace(/

      <\/table>/g, ''); html = html.replace(/

      CIM Review Summary

      CIM Review Summary

      Generated on ${new Date().toLocaleDateString()} at ${new Date().toLocaleTimeString()}

      ${html}
      `; } /** * Generate PDF from markdown content */ async generatePDFFromMarkdown( markdown: string, outputPath: string, options: PDFGenerationOptions = {} ): Promise { const page = await this.getPage(); try { // Convert markdown to HTML const html = this.markdownToHTML(markdown); // Set content with timeout await page.setContent(html, { waitUntil: 'networkidle0', timeout: options.timeout || this.defaultOptions.timeout, }); // Ensure output directory exists const outputDir = path.dirname(outputPath); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } // Generate PDF const pdfOptions = { ...this.defaultOptions, ...options, path: outputPath, }; await page.pdf(pdfOptions); logger.info(`PDF generated successfully: ${outputPath}`); return true; } catch (error) { logger.error(`PDF generation failed: ${outputPath}`, error); return false; } finally { this.releasePage(page); } } /** * Generate PDF from markdown and return as buffer */ async generatePDFBuffer(markdown: string, options: PDFGenerationOptions = {}): Promise { // Check cache first const cacheKey = this.generateCacheKey(markdown, options); const cached = this.getCachedPDF(cacheKey); if (cached) { return cached; } return memoryMonitorService.monitorOperation( 'PDF Generation', async () => { const page = await this.getPage(); try { // Convert markdown to HTML const html = this.markdownToHTML(markdown); // Set content with timeout await page.setContent(html, { waitUntil: 'networkidle0', timeout: options.timeout || this.defaultOptions.timeout, }); // Generate PDF as buffer const pdfOptions = { ...this.defaultOptions, ...options, }; const buffer = await page.pdf(pdfOptions); // Cache the result this.cachePDF(cacheKey, buffer); logger.info('PDF buffer generated successfully'); return buffer; } catch (error) { logger.error('PDF buffer generation failed', error); return null; } finally { this.releasePage(page); } } ); } /** * Generate PDF from HTML file */ async generatePDFFromHTML( htmlPath: string, outputPath: string, options: PDFGenerationOptions = {} ): Promise { const browser = await this.getBrowser(); const page = await browser.newPage(); try { // Navigate to HTML file await page.goto(`file://${htmlPath}`, { waitUntil: 'networkidle0', }); // Ensure output directory exists const outputDir = path.dirname(outputPath); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } // Generate PDF const pdfOptions = { ...this.defaultOptions, ...options, path: outputPath, }; await page.pdf(pdfOptions); logger.info(`PDF generated from HTML: ${outputPath}`); return true; } catch (error) { logger.error(`PDF generation from HTML failed: ${outputPath}`, error); return false; } finally { await page.close(); } } /** * Generate PDF from URL */ async generatePDFFromURL( url: string, outputPath: string, options: PDFGenerationOptions = {} ): Promise { const browser = await this.getBrowser(); const page = await browser.newPage(); try { // Navigate to URL await page.goto(url, { waitUntil: 'networkidle0', timeout: 30000, }); // Ensure output directory exists const outputDir = path.dirname(outputPath); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } // Generate PDF const pdfOptions = { ...this.defaultOptions, ...options, path: outputPath, }; await page.pdf(pdfOptions); logger.info(`PDF generated from URL: ${outputPath}`); return true; } catch (error) { logger.error(`PDF generation from URL failed: ${outputPath}`, error); return false; } finally { await page.close(); } } /** * Validate PDF file */ async validatePDF(filePath: string): Promise { try { const buffer = fs.readFileSync(filePath); // Check if file starts with PDF magic number const pdfHeader = buffer.toString('ascii', 0, 4); if (pdfHeader !== '%PDF') { return false; } // Check file size const stats = fs.statSync(filePath); if (stats.size < 100) { return false; } return true; } catch (error) { logger.error(`PDF validation failed: ${filePath}`, error); return false; } } /** * Get PDF metadata */ async getPDFMetadata(filePath: string): Promise { const browser = await this.getBrowser(); const page = await browser.newPage(); try { await page.goto(`file://${filePath}`, { waitUntil: 'networkidle0', }); const metadata = await page.evaluate(() => { return { title: 'PDF Document', url: 'file://', pageCount: 1, // This would need to be calculated differently }; }); return metadata; } catch (error) { logger.error(`Failed to get PDF metadata: ${filePath}`, error); return null; } finally { await page.close(); } } /** * Generate CIM Review PDF from analysis data */ async generateCIMReviewPDF(analysisData: any): Promise { try { // Convert analysis data to HTML const html = this.generateCIMReviewHTML(analysisData); // Try to generate PDF with Puppeteer first const page = await this.getPage(); try { await page.setContent(html, { waitUntil: 'networkidle0' }); const pdfBuffer = await page.pdf({ format: 'A4', margin: { top: '0.5in', right: '0.5in', bottom: '0.5in', left: '0.5in', }, displayHeaderFooter: true, printBackground: true, }); this.releasePage(page); return pdfBuffer; } catch (puppeteerError) { this.releasePage(page); throw puppeteerError; } } catch (error) { logger.error('Failed to generate CIM Review PDF with Puppeteer, trying fallback method', error); // Fallback: Generate a simple text-based PDF without Chrome return this.generateSimplePDF(analysisData); } } /** * Generate a simple PDF using PDFKit (fallback method) */ private async generateSimplePDF(analysisData: any): Promise { try { return new Promise((resolve, reject) => { const doc = new PDFDocument({ size: 'A4', margins: { top: 50, bottom: 50, left: 50, right: 50 } }); const chunks: Buffer[] = []; doc.on('data', (chunk: Buffer) => chunks.push(chunk)); doc.on('end', () => { const result = Buffer.concat(chunks); resolve(result); }); doc.on('error', (error: any) => { reject(error); }); // Add header doc.fontSize(24) .font('Helvetica-Bold') .text('BLUEPOINT Capital Partners', { align: 'center' }); doc.moveDown(0.5); doc.fontSize(18) .font('Helvetica-Bold') .text('CIM Review Report', { align: 'center' }); doc.moveDown(0.5); doc.fontSize(10) .font('Helvetica') .text(`Generated: ${new Date().toLocaleDateString()} at ${new Date().toLocaleTimeString()}`, { align: 'center' }); doc.moveDown(2); // Add content sections const sections = [ { title: 'Deal Overview', data: analysisData.dealOverview }, { title: 'Business Description', data: analysisData.businessDescription }, { title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis }, { title: 'Financial Summary', data: analysisData.financialSummary }, { title: 'Management Team Overview', data: analysisData.managementTeamOverview }, { title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis }, { title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps }, ]; sections.forEach(section => { if (section.data) { // Add section title doc.fontSize(14) .font('Helvetica-Bold') .text(section.title); doc.moveDown(0.5); // Add section content Object.entries(section.data).forEach(([key, value]) => { if (value && typeof value !== 'object') { doc.fontSize(10) .font('Helvetica-Bold') .text(`${this.formatFieldName(key)}:`, { continued: true }); doc.fontSize(10) .font('Helvetica') .text(` ${value}`); doc.moveDown(0.3); } }); doc.moveDown(1); } }); // Add footer doc.moveDown(2); doc.fontSize(8) .font('Helvetica') .text('BLUEPOINT Capital Partners | CIM Document Processor | Confidential', { align: 'center' }); doc.end(); }); } catch (error) { logger.error('PDFKit PDF generation failed', error); throw error; } } /** * Generate HTML from CIM Review analysis data */ private generateCIMReviewHTML(analysisData: any): string { const sections = [ { title: 'Deal Overview', data: analysisData.dealOverview, icon: '📊' }, { title: 'Business Description', data: analysisData.businessDescription, icon: '🏢' }, { title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis, icon: '📈' }, { title: 'Financial Summary', data: analysisData.financialSummary, icon: '💰' }, { title: 'Management Team Overview', data: analysisData.managementTeamOverview, icon: '👥' }, { title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis, icon: '🎯' }, { title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps, icon: '❓' }, ]; let html = ` CIM Review Report
      ${this.getLogoBase64() ? `

      BLUEPOINT Capital Partners

      Professional Investment Analysis

      CIM Review Report

      Comprehensive Investment Memorandum Analysis

      ` : `

      CIM Review Report

      BLUEPOINT Capital Partners - Professional Investment Analysis

      `}
      Generated on ${new Date().toLocaleDateString()}
      at ${new Date().toLocaleTimeString()}
      `; sections.forEach(section => { if (section.data) { html += `

      ${section.icon}${section.title}

      `; Object.entries(section.data).forEach(([key, value]) => { if (key === 'financials' && typeof value === 'object') { // Handle financial table specifically html += `

      💰 Financial Data

      `; html += `
      `; html += ``; html += ``; const periods = ['fy3', 'fy2', 'fy1', 'ltm']; periods.forEach(period => { if (value && typeof value === 'object' && value[period as keyof typeof value]) { const data = value[period as keyof typeof value] as any; html += ` `; } }); html += `
      PeriodRevenueGrowthEBITDAMargin
      ${period.toUpperCase()} ${data?.revenue || '-'} ${data?.revenueGrowth || '-'} ${data?.ebitda || '-'} ${data?.ebitdaMargin || '-'}
      `; } else if (value && typeof value === 'object' && !Array.isArray(value)) { // Handle nested objects (but skip financials since we handled it above) html += `

      📋 ${this.formatFieldName(key)}

      `; Object.entries(value).forEach(([subKey, subValue]) => { if (subValue && typeof subValue !== 'object') { html += `
      ${this.formatFieldName(subKey)} ${subValue}
      `; } }); } else if (value) { // Handle simple fields html += `
      ${this.formatFieldName(key)} ${value}
      `; } }); html += ``; } }); html += ` `; return html; } /** * Get logo as base64 string for embedding in HTML */ private getLogoBase64(): string { try { const logoPath = path.join(__dirname, '../assets/bluepoint-logo.png'); const logoBuffer = fs.readFileSync(logoPath); return logoBuffer.toString('base64'); } catch (error) { logger.error('Failed to load logo:', error); // Return empty string if logo not found - this will hide the logo but allow PDF generation to continue return ''; } } /** * Format field names for display */ private formatFieldName(fieldName: string): string { return fieldName .replace(/([A-Z])/g, ' $1') .replace(/^./, str => str.toUpperCase()) .replace(/([A-Z]{2,})/g, match => match.charAt(0) + match.slice(1).toLowerCase()); } /** * Close browser instance */ async close(): Promise { // Close all pages in the pool for (const poolItem of this.pagePool) { try { await poolItem.page.close(); } catch (error) { logger.error('Error closing page:', error); } } this.pagePool = []; // Clear cache this.cache.clear(); // Close browser if (this.browser) { await this.browser.close(); this.browser = null; } } /** * Clean up temporary files */ async cleanup(): Promise { await this.close(); } /** * Get service statistics */ getStats(): { pagePoolSize: number; cacheSize: number; activePages: number; } { return { pagePoolSize: this.pagePool.length, cacheSize: this.cache.size, activePages: this.pagePool.filter(p => p.inUse).length, }; } } export const pdfGenerationService = new PDFGenerationService(); export default pdfGenerationService;