Some checks failed
CI/CD Pipeline / Backend - Lint & Test (push) Has been cancelled
CI/CD Pipeline / Frontend - Lint & Test (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Build Backend (push) Has been cancelled
CI/CD Pipeline / Build Frontend (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Performance Tests (push) Has been cancelled
CI/CD Pipeline / Dependency Updates (push) Has been cancelled
- Updated Anthropic API to latest version (2024-01-01) - Set Claude 3.7 Sonnet Latest as primary model - Removed deprecated Opus 3.5 references - Fixed LLM response validation and JSON parsing - Improved error handling and logging - Updated model configurations and pricing - Enhanced document processing reliability - Fixed TypeScript type issues - Updated environment configuration
1399 lines
39 KiB
TypeScript
1399 lines
39 KiB
TypeScript
// Mock puppeteer in test environment
|
|
let puppeteer: any;
|
|
try {
|
|
puppeteer = require('puppeteer');
|
|
} catch (error) {
|
|
// Mock puppeteer for test environment
|
|
puppeteer = {
|
|
launch: async () => ({
|
|
newPage: async () => ({
|
|
setContent: async () => {},
|
|
pdf: async () => {},
|
|
close: async () => {},
|
|
evaluate: async () => ({ title: 'Test', url: 'test://' }),
|
|
goto: async () => {},
|
|
}),
|
|
close: async () => {},
|
|
}),
|
|
};
|
|
}
|
|
|
|
// Import PDFKit for fallback PDF generation
|
|
let PDFDocument: any;
|
|
try {
|
|
PDFDocument = require('pdfkit');
|
|
} catch (error) {
|
|
// Mock PDFKit for test environment
|
|
PDFDocument = class MockPDFDocument {
|
|
constructor() {}
|
|
pipe() { return this; }
|
|
end() { return this; }
|
|
font() { return this; }
|
|
fontSize() { return this; }
|
|
text() { return this; }
|
|
moveDown() { return this; }
|
|
addPage() { return this; }
|
|
};
|
|
}
|
|
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import { logger } from '../utils/logger';
|
|
import { memoryMonitorService } from './memoryMonitorService';
|
|
|
|
export interface PDFGenerationOptions {
|
|
format?: 'A4' | 'Letter';
|
|
margin?: {
|
|
top: string;
|
|
right: string;
|
|
bottom: string;
|
|
left: string;
|
|
};
|
|
headerTemplate?: string;
|
|
footerTemplate?: string;
|
|
displayHeaderFooter?: boolean;
|
|
printBackground?: boolean;
|
|
quality?: 'low' | 'medium' | 'high';
|
|
timeout?: number;
|
|
}
|
|
|
|
interface PagePool {
|
|
page: any;
|
|
inUse: boolean;
|
|
lastUsed: number;
|
|
}
|
|
|
|
class PDFGenerationService {
|
|
private browser: any = null;
|
|
private pagePool: PagePool[] = [];
|
|
private readonly maxPoolSize = 5;
|
|
private readonly pageTimeout = 30000; // 30 seconds
|
|
private readonly cache = new Map<string, { buffer: Buffer; timestamp: number }>();
|
|
private readonly cacheTimeout = 300000; // 5 minutes
|
|
|
|
private readonly defaultOptions: PDFGenerationOptions = {
|
|
format: 'A4',
|
|
margin: {
|
|
top: '1in',
|
|
right: '1in',
|
|
bottom: '1in',
|
|
left: '1in',
|
|
},
|
|
displayHeaderFooter: true,
|
|
printBackground: true,
|
|
quality: 'high',
|
|
timeout: 30000,
|
|
};
|
|
|
|
/**
|
|
* Initialize the browser instance
|
|
*/
|
|
private async getBrowser(): Promise<any> {
|
|
if (!this.browser) {
|
|
const launchOptions: any = {
|
|
headless: 'new',
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-accelerated-2d-canvas',
|
|
'--no-first-run',
|
|
'--no-zygote',
|
|
'--disable-gpu',
|
|
'--disable-background-timer-throttling',
|
|
'--disable-backgrounding-occluded-windows',
|
|
'--disable-renderer-backgrounding',
|
|
],
|
|
};
|
|
|
|
// For Firebase Functions environment, use the bundled Chrome
|
|
if (process.env.FUNCTIONS_EMULATOR || process.env.FIREBASE_FUNCTIONS) {
|
|
launchOptions.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable';
|
|
}
|
|
|
|
this.browser = await puppeteer.launch(launchOptions);
|
|
}
|
|
return this.browser;
|
|
}
|
|
|
|
/**
|
|
* Get a page from the pool or create a new one
|
|
*/
|
|
private async getPage(): Promise<any> {
|
|
// Clean up expired pages
|
|
this.cleanupExpiredPages();
|
|
|
|
// Try to find an available page in the pool
|
|
const availablePage = this.pagePool.find(p => !p.inUse);
|
|
if (availablePage) {
|
|
availablePage.inUse = true;
|
|
availablePage.lastUsed = Date.now();
|
|
return availablePage.page;
|
|
}
|
|
|
|
// Create a new page if pool is not full
|
|
if (this.pagePool.length < this.maxPoolSize) {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
// Optimize page settings
|
|
await page.setViewport({ width: 1200, height: 800 });
|
|
await page.setCacheEnabled(false);
|
|
|
|
const pagePoolItem: PagePool = {
|
|
page,
|
|
inUse: true,
|
|
lastUsed: Date.now(),
|
|
};
|
|
|
|
this.pagePool.push(pagePoolItem);
|
|
return page;
|
|
}
|
|
|
|
// Wait for a page to become available
|
|
return new Promise((resolve) => {
|
|
const checkForAvailablePage = () => {
|
|
const availablePage = this.pagePool.find(p => !p.inUse);
|
|
if (availablePage) {
|
|
availablePage.inUse = true;
|
|
availablePage.lastUsed = Date.now();
|
|
resolve(availablePage.page);
|
|
} else {
|
|
setTimeout(checkForAvailablePage, 100);
|
|
}
|
|
};
|
|
checkForAvailablePage();
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Release a page back to the pool
|
|
*/
|
|
private releasePage(page: any): void {
|
|
const pagePoolItem = this.pagePool.find(p => p.page === page);
|
|
if (pagePoolItem) {
|
|
pagePoolItem.inUse = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean up expired pages from the pool
|
|
*/
|
|
private cleanupExpiredPages(): void {
|
|
const now = Date.now();
|
|
this.pagePool = this.pagePool.filter(poolItem => {
|
|
if (now - poolItem.lastUsed > this.pageTimeout) {
|
|
if (!poolItem.inUse) {
|
|
poolItem.page.close().catch(err =>
|
|
logger.error('Error closing expired page:', err)
|
|
);
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Generate cache key for content
|
|
*/
|
|
private generateCacheKey(content: string, options: PDFGenerationOptions): string {
|
|
const optionsHash = JSON.stringify(options);
|
|
return Buffer.from(content + optionsHash).toString('base64').substring(0, 32);
|
|
}
|
|
|
|
/**
|
|
* Check cache for existing PDF
|
|
*/
|
|
private getCachedPDF(cacheKey: string): Buffer | null {
|
|
const cached = this.cache.get(cacheKey);
|
|
if (cached && Date.now() - cached.timestamp < this.cacheTimeout) {
|
|
logger.info('PDF served from cache');
|
|
return cached.buffer;
|
|
}
|
|
if (cached) {
|
|
this.cache.delete(cacheKey);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Cache PDF buffer
|
|
*/
|
|
private cachePDF(cacheKey: string, buffer: Buffer): void {
|
|
// Limit cache size
|
|
if (this.cache.size > 100) {
|
|
const oldestKey = this.cache.keys().next().value;
|
|
if (oldestKey) {
|
|
this.cache.delete(oldestKey);
|
|
}
|
|
}
|
|
this.cache.set(cacheKey, { buffer, timestamp: Date.now() });
|
|
}
|
|
|
|
/**
|
|
* Convert markdown to HTML
|
|
*/
|
|
private markdownToHTML(markdown: string): string {
|
|
// Enhanced markdown to HTML conversion with table support
|
|
let html = markdown
|
|
// Headers
|
|
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
|
|
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
|
|
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
|
|
// Bold
|
|
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
|
// Italic
|
|
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
|
// Lists
|
|
.replace(/^- (.*$)/gim, '<li>$1</li>')
|
|
// Paragraphs (but preserve tables)
|
|
.replace(/\n\n/g, '</p><p>')
|
|
.replace(/^(.+)$/gm, '<p>$1</p>');
|
|
|
|
// Wrap lists properly
|
|
html = html.replace(/<li>(.*?)<\/li>/g, '<ul><li>$1</li></ul>');
|
|
html = html.replace(/<\/ul>\s*<ul>/g, '');
|
|
|
|
// Preserve HTML tables by removing paragraph tags around them
|
|
html = html.replace(/<p><table/g, '<table');
|
|
html = html.replace(/<\/table><\/p>/g, '</table>');
|
|
html = html.replace(/<p><\/table>/g, '</table>');
|
|
html = html.replace(/<p><table/g, '<table');
|
|
|
|
return `
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>CIM Review Summary</title>
|
|
<style>
|
|
@page {
|
|
margin: 0.75in;
|
|
size: A4;
|
|
}
|
|
|
|
* {
|
|
box-sizing: border-box;
|
|
}
|
|
|
|
body {
|
|
font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
|
|
font-size: 11pt;
|
|
line-height: 1.6;
|
|
color: #2d3748;
|
|
margin: 0;
|
|
padding: 0;
|
|
background: #ffffff;
|
|
}
|
|
|
|
h1 {
|
|
font-size: 24pt;
|
|
font-weight: 700;
|
|
color: #1a202c;
|
|
text-align: center;
|
|
margin-bottom: 12pt;
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
-webkit-background-clip: text;
|
|
-webkit-text-fill-color: transparent;
|
|
background-clip: text;
|
|
position: relative;
|
|
}
|
|
|
|
h1::after {
|
|
content: '';
|
|
position: absolute;
|
|
bottom: -8pt;
|
|
left: 50%;
|
|
transform: translateX(-50%);
|
|
width: 60pt;
|
|
height: 3pt;
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
border-radius: 2pt;
|
|
}
|
|
|
|
h2 {
|
|
font-size: 16pt;
|
|
font-weight: 600;
|
|
color: #2d3748;
|
|
margin-top: 24pt;
|
|
margin-bottom: 12pt;
|
|
padding-bottom: 8pt;
|
|
border-bottom: 2pt solid #e2e8f0;
|
|
page-break-after: avoid;
|
|
position: relative;
|
|
}
|
|
|
|
h2::before {
|
|
content: '';
|
|
position: absolute;
|
|
left: 0;
|
|
bottom: -2pt;
|
|
width: 40pt;
|
|
height: 2pt;
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
}
|
|
|
|
h3 {
|
|
font-size: 14pt;
|
|
font-weight: 600;
|
|
color: #4a5568;
|
|
margin-top: 20pt;
|
|
margin-bottom: 8pt;
|
|
page-break-after: avoid;
|
|
}
|
|
|
|
p {
|
|
margin-bottom: 10pt;
|
|
text-align: justify;
|
|
color: #4a5568;
|
|
}
|
|
|
|
ul {
|
|
margin-bottom: 12pt;
|
|
margin-left: 24pt;
|
|
}
|
|
|
|
li {
|
|
margin-bottom: 6pt;
|
|
text-align: justify;
|
|
color: #4a5568;
|
|
position: relative;
|
|
}
|
|
|
|
li::before {
|
|
content: '•';
|
|
color: #667eea;
|
|
font-weight: bold;
|
|
position: absolute;
|
|
left: -16pt;
|
|
}
|
|
|
|
strong {
|
|
font-weight: 600;
|
|
color: #2d3748;
|
|
}
|
|
|
|
em {
|
|
color: #718096;
|
|
font-style: italic;
|
|
}
|
|
|
|
.header {
|
|
text-align: center;
|
|
margin-bottom: 24pt;
|
|
padding: 20pt 0;
|
|
background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);
|
|
border-radius: 8pt;
|
|
border: 1pt solid #e2e8f0;
|
|
}
|
|
|
|
.header h1 {
|
|
margin-bottom: 8pt;
|
|
-webkit-text-fill-color: #1a202c;
|
|
}
|
|
|
|
.header h1::after {
|
|
display: none;
|
|
}
|
|
|
|
.header p {
|
|
font-size: 10pt;
|
|
color: #718096;
|
|
margin: 0;
|
|
font-weight: 500;
|
|
}
|
|
|
|
.footer {
|
|
text-align: center;
|
|
margin-top: 24pt;
|
|
padding: 16pt 0;
|
|
border-top: 2pt solid #e2e8f0;
|
|
font-size: 9pt;
|
|
color: #718096;
|
|
background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);
|
|
border-radius: 8pt;
|
|
}
|
|
|
|
.section {
|
|
margin-bottom: 20pt;
|
|
page-break-inside: avoid;
|
|
padding: 16pt;
|
|
background: #ffffff;
|
|
border-radius: 6pt;
|
|
border: 1pt solid #f1f5f9;
|
|
}
|
|
|
|
.financial-table {
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
margin: 12pt 0;
|
|
font-size: 10pt;
|
|
border-radius: 6pt;
|
|
overflow: hidden;
|
|
box-shadow: 0 2pt 8pt rgba(0, 0, 0, 0.1);
|
|
}
|
|
|
|
.financial-table th,
|
|
.financial-table td {
|
|
border: 1pt solid #e2e8f0;
|
|
padding: 8pt;
|
|
text-align: left;
|
|
}
|
|
|
|
.financial-table th {
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
color: #ffffff;
|
|
font-weight: 600;
|
|
text-transform: uppercase;
|
|
font-size: 9pt;
|
|
letter-spacing: 0.5pt;
|
|
}
|
|
|
|
.financial-table td {
|
|
background: #ffffff;
|
|
color: #4a5568;
|
|
}
|
|
|
|
.financial-table tr:nth-child(even) td {
|
|
background: #f7fafc;
|
|
}
|
|
|
|
.financial-table tr:hover td {
|
|
background: #edf2f7;
|
|
}
|
|
|
|
.page-break {
|
|
page-break-before: always;
|
|
}
|
|
|
|
.avoid-break {
|
|
page-break-inside: avoid;
|
|
}
|
|
|
|
.highlight-box {
|
|
background: linear-gradient(135deg, #fef5e7 0%, #fed7aa 100%);
|
|
border-left: 4pt solid #f59e0b;
|
|
padding: 12pt;
|
|
margin: 12pt 0;
|
|
border-radius: 6pt;
|
|
}
|
|
|
|
.info-box {
|
|
background: linear-gradient(135deg, #ebf8ff 0%, #bee3f8 100%);
|
|
border-left: 4pt solid #3182ce;
|
|
padding: 12pt;
|
|
margin: 12pt 0;
|
|
border-radius: 6pt;
|
|
}
|
|
|
|
.success-box {
|
|
background: linear-gradient(135deg, #f0fff4 0%, #c6f6d5 100%);
|
|
border-left: 4pt solid #38a169;
|
|
padding: 12pt;
|
|
margin: 12pt 0;
|
|
border-radius: 6pt;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="header">
|
|
<h1>CIM Review Summary</h1>
|
|
<p>Generated on ${new Date().toLocaleDateString()} at ${new Date().toLocaleTimeString()}</p>
|
|
</div>
|
|
<div class="content">
|
|
${html}
|
|
</div>
|
|
<div class="footer">
|
|
<p>BPCP CIM Document Processor | Confidential | Professional Analysis</p>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
`;
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from markdown content
|
|
*/
|
|
async generatePDFFromMarkdown(
|
|
markdown: string,
|
|
outputPath: string,
|
|
options: PDFGenerationOptions = {}
|
|
): Promise<boolean> {
|
|
const page = await this.getPage();
|
|
|
|
try {
|
|
// Convert markdown to HTML
|
|
const html = this.markdownToHTML(markdown);
|
|
|
|
// Set content with timeout
|
|
await page.setContent(html, {
|
|
waitUntil: 'networkidle0',
|
|
timeout: options.timeout || this.defaultOptions.timeout,
|
|
});
|
|
|
|
// Ensure output directory exists
|
|
const outputDir = path.dirname(outputPath);
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate PDF
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
path: outputPath,
|
|
};
|
|
|
|
await page.pdf(pdfOptions);
|
|
|
|
logger.info(`PDF generated successfully: ${outputPath}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF generation failed: ${outputPath}`, error);
|
|
return false;
|
|
} finally {
|
|
this.releasePage(page);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from markdown and return as buffer
|
|
*/
|
|
async generatePDFBuffer(markdown: string, options: PDFGenerationOptions = {}): Promise<Buffer | null> {
|
|
// Check cache first
|
|
const cacheKey = this.generateCacheKey(markdown, options);
|
|
const cached = this.getCachedPDF(cacheKey);
|
|
if (cached) {
|
|
return cached;
|
|
}
|
|
|
|
return memoryMonitorService.monitorOperation(
|
|
'PDF Generation',
|
|
async () => {
|
|
const page = await this.getPage();
|
|
|
|
try {
|
|
// Convert markdown to HTML
|
|
const html = this.markdownToHTML(markdown);
|
|
|
|
// Set content with timeout
|
|
await page.setContent(html, {
|
|
waitUntil: 'networkidle0',
|
|
timeout: options.timeout || this.defaultOptions.timeout,
|
|
});
|
|
|
|
// Generate PDF as buffer
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
};
|
|
|
|
const buffer = await page.pdf(pdfOptions);
|
|
|
|
// Cache the result
|
|
this.cachePDF(cacheKey, buffer);
|
|
|
|
logger.info('PDF buffer generated successfully');
|
|
return buffer;
|
|
} catch (error) {
|
|
logger.error('PDF buffer generation failed', error);
|
|
return null;
|
|
} finally {
|
|
this.releasePage(page);
|
|
}
|
|
}
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from HTML file
|
|
*/
|
|
async generatePDFFromHTML(
|
|
htmlPath: string,
|
|
outputPath: string,
|
|
options: PDFGenerationOptions = {}
|
|
): Promise<boolean> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
// Navigate to HTML file
|
|
await page.goto(`file://${htmlPath}`, {
|
|
waitUntil: 'networkidle0',
|
|
});
|
|
|
|
// Ensure output directory exists
|
|
const outputDir = path.dirname(outputPath);
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate PDF
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
path: outputPath,
|
|
};
|
|
|
|
await page.pdf(pdfOptions);
|
|
|
|
logger.info(`PDF generated from HTML: ${outputPath}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF generation from HTML failed: ${outputPath}`, error);
|
|
return false;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate PDF from URL
|
|
*/
|
|
async generatePDFFromURL(
|
|
url: string,
|
|
outputPath: string,
|
|
options: PDFGenerationOptions = {}
|
|
): Promise<boolean> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
// Navigate to URL
|
|
await page.goto(url, {
|
|
waitUntil: 'networkidle0',
|
|
timeout: 30000,
|
|
});
|
|
|
|
// Ensure output directory exists
|
|
const outputDir = path.dirname(outputPath);
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate PDF
|
|
const pdfOptions = {
|
|
...this.defaultOptions,
|
|
...options,
|
|
path: outputPath,
|
|
};
|
|
|
|
await page.pdf(pdfOptions);
|
|
|
|
logger.info(`PDF generated from URL: ${outputPath}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF generation from URL failed: ${outputPath}`, error);
|
|
return false;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate PDF file
|
|
*/
|
|
async validatePDF(filePath: string): Promise<boolean> {
|
|
try {
|
|
const buffer = fs.readFileSync(filePath);
|
|
|
|
// Check if file starts with PDF magic number
|
|
const pdfHeader = buffer.toString('ascii', 0, 4);
|
|
if (pdfHeader !== '%PDF') {
|
|
return false;
|
|
}
|
|
|
|
// Check file size
|
|
const stats = fs.statSync(filePath);
|
|
if (stats.size < 100) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`PDF validation failed: ${filePath}`, error);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get PDF metadata
|
|
*/
|
|
async getPDFMetadata(filePath: string): Promise<any> {
|
|
const browser = await this.getBrowser();
|
|
const page = await browser.newPage();
|
|
|
|
try {
|
|
await page.goto(`file://${filePath}`, {
|
|
waitUntil: 'networkidle0',
|
|
});
|
|
|
|
const metadata = await page.evaluate(() => {
|
|
return {
|
|
title: 'PDF Document',
|
|
url: 'file://',
|
|
pageCount: 1, // This would need to be calculated differently
|
|
};
|
|
});
|
|
|
|
return metadata;
|
|
} catch (error) {
|
|
logger.error(`Failed to get PDF metadata: ${filePath}`, error);
|
|
return null;
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate CIM Review PDF from analysis data
|
|
*/
|
|
async generateCIMReviewPDF(analysisData: any): Promise<Buffer> {
|
|
try {
|
|
// Convert analysis data to HTML
|
|
const html = this.generateCIMReviewHTML(analysisData);
|
|
|
|
// Try to generate PDF with Puppeteer first
|
|
const page = await this.getPage();
|
|
|
|
try {
|
|
await page.setContent(html, { waitUntil: 'networkidle0' });
|
|
const pdfBuffer = await page.pdf({
|
|
format: 'A4',
|
|
margin: {
|
|
top: '0.5in',
|
|
right: '0.5in',
|
|
bottom: '0.5in',
|
|
left: '0.5in',
|
|
},
|
|
displayHeaderFooter: true,
|
|
printBackground: true,
|
|
});
|
|
|
|
this.releasePage(page);
|
|
return pdfBuffer;
|
|
} catch (puppeteerError) {
|
|
this.releasePage(page);
|
|
throw puppeteerError;
|
|
}
|
|
} catch (error) {
|
|
logger.error('Failed to generate CIM Review PDF with Puppeteer, trying fallback method', error);
|
|
|
|
// Fallback: Generate a simple text-based PDF without Chrome
|
|
return this.generateSimplePDF(analysisData);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate a simple PDF using PDFKit (fallback method)
|
|
*/
|
|
private async generateSimplePDF(analysisData: any): Promise<Buffer> {
|
|
try {
|
|
return new Promise((resolve, reject) => {
|
|
const doc = new PDFDocument({
|
|
size: 'A4',
|
|
margins: {
|
|
top: 50,
|
|
bottom: 50,
|
|
left: 50,
|
|
right: 50
|
|
}
|
|
});
|
|
|
|
const chunks: Buffer[] = [];
|
|
doc.on('data', (chunk: Buffer) => chunks.push(chunk));
|
|
doc.on('end', () => {
|
|
const result = Buffer.concat(chunks);
|
|
resolve(result);
|
|
});
|
|
doc.on('error', (error: any) => {
|
|
reject(error);
|
|
});
|
|
|
|
// Add header
|
|
doc.fontSize(24)
|
|
.font('Helvetica-Bold')
|
|
.text('BLUEPOINT Capital Partners', { align: 'center' });
|
|
|
|
doc.moveDown(0.5);
|
|
doc.fontSize(18)
|
|
.font('Helvetica-Bold')
|
|
.text('CIM Review Report', { align: 'center' });
|
|
|
|
doc.moveDown(0.5);
|
|
doc.fontSize(10)
|
|
.font('Helvetica')
|
|
.text(`Generated: ${new Date().toLocaleDateString()} at ${new Date().toLocaleTimeString()}`, { align: 'center' });
|
|
|
|
doc.moveDown(2);
|
|
|
|
// Add content sections
|
|
const sections = [
|
|
{ title: 'Deal Overview', data: analysisData.dealOverview },
|
|
{ title: 'Business Description', data: analysisData.businessDescription },
|
|
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis },
|
|
{ title: 'Financial Summary', data: analysisData.financialSummary },
|
|
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview },
|
|
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis },
|
|
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps },
|
|
];
|
|
|
|
sections.forEach(section => {
|
|
if (section.data) {
|
|
// Add section title
|
|
doc.fontSize(14)
|
|
.font('Helvetica-Bold')
|
|
.text(section.title);
|
|
|
|
doc.moveDown(0.5);
|
|
|
|
// Add section content
|
|
Object.entries(section.data).forEach(([key, value]) => {
|
|
if (value && typeof value !== 'object') {
|
|
doc.fontSize(10)
|
|
.font('Helvetica-Bold')
|
|
.text(`${this.formatFieldName(key)}:`, { continued: true });
|
|
|
|
doc.fontSize(10)
|
|
.font('Helvetica')
|
|
.text(` ${value}`);
|
|
|
|
doc.moveDown(0.3);
|
|
}
|
|
});
|
|
|
|
doc.moveDown(1);
|
|
}
|
|
});
|
|
|
|
// Add footer
|
|
doc.moveDown(2);
|
|
doc.fontSize(8)
|
|
.font('Helvetica')
|
|
.text('BLUEPOINT Capital Partners | CIM Document Processor | Confidential', { align: 'center' });
|
|
|
|
doc.end();
|
|
});
|
|
} catch (error) {
|
|
logger.error('PDFKit PDF generation failed', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Generate HTML from CIM Review analysis data
|
|
*/
|
|
private generateCIMReviewHTML(analysisData: any): string {
|
|
const sections = [
|
|
{ title: 'Deal Overview', data: analysisData.dealOverview, icon: '📊' },
|
|
{ title: 'Business Description', data: analysisData.businessDescription, icon: '🏢' },
|
|
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis, icon: '📈' },
|
|
{ title: 'Financial Summary', data: analysisData.financialSummary, icon: '💰' },
|
|
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview, icon: '👥' },
|
|
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis, icon: '🎯' },
|
|
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps, icon: '❓' },
|
|
];
|
|
|
|
let html = `
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>CIM Review Report</title>
|
|
<style>
|
|
:root {
|
|
--page-margin: 0.75in;
|
|
--radius: 10px;
|
|
--shadow: 0 12px 30px -10px rgba(0,0,0,0.08);
|
|
--color-bg: #ffffff;
|
|
--color-muted: #f5f7fa;
|
|
--color-text: #1f2937;
|
|
--color-heading: #111827;
|
|
--color-border: #dfe3ea;
|
|
--color-primary: #5f6cff;
|
|
--color-primary-dark: #4a52d1;
|
|
--color-success-bg: #e6f4ea;
|
|
--color-success-border: #38a169;
|
|
--color-highlight-bg: #fff8ed;
|
|
--color-highlight-border: #f29f3f;
|
|
--color-summary-bg: #eef7fe;
|
|
--color-summary-border: #3182ce;
|
|
--font-stack: -apple-system, system-ui, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
|
}
|
|
|
|
@page {
|
|
margin: var(--page-margin);
|
|
size: A4;
|
|
}
|
|
|
|
* { box-sizing: border-box; }
|
|
|
|
body {
|
|
margin: 0;
|
|
padding: 0;
|
|
font-family: var(--font-stack);
|
|
background: var(--color-bg);
|
|
color: var(--color-text);
|
|
line-height: 1.45;
|
|
font-size: 11pt;
|
|
}
|
|
|
|
.container {
|
|
max-width: 940px;
|
|
margin: 0 auto;
|
|
}
|
|
|
|
.header {
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
justify-content: space-between;
|
|
align-items: flex-start;
|
|
padding: 24px 20px;
|
|
background: #f9fbfc;
|
|
border-radius: var(--radius);
|
|
border: 1px solid var(--color-border);
|
|
margin-bottom: 28px;
|
|
gap: 12px;
|
|
}
|
|
|
|
.header-left {
|
|
flex: 1 1 300px;
|
|
display: flex;
|
|
align-items: center;
|
|
gap: 16px;
|
|
}
|
|
|
|
.logo {
|
|
width: 60px;
|
|
height: 60px;
|
|
object-fit: contain;
|
|
flex-shrink: 0;
|
|
}
|
|
|
|
.logo-container {
|
|
display: flex;
|
|
align-items: center;
|
|
gap: 16px;
|
|
}
|
|
|
|
.company-info {
|
|
display: flex;
|
|
flex-direction: column;
|
|
gap: 4px;
|
|
}
|
|
|
|
.company-name {
|
|
font-size: 14pt;
|
|
font-weight: 600;
|
|
color: var(--color-heading);
|
|
margin: 0;
|
|
}
|
|
|
|
.company-tagline {
|
|
font-size: 9pt;
|
|
color: #6b7280;
|
|
margin: 0;
|
|
}
|
|
|
|
.title {
|
|
margin: 0;
|
|
font-size: 24pt;
|
|
font-weight: 700;
|
|
color: var(--color-heading);
|
|
position: relative;
|
|
display: inline-block;
|
|
padding-bottom: 4px;
|
|
}
|
|
|
|
.title:after {
|
|
content: '';
|
|
position: absolute;
|
|
left: 0;
|
|
bottom: 0;
|
|
height: 4px;
|
|
width: 60px;
|
|
background: linear-gradient(90deg, var(--color-primary), var(--color-primary-dark));
|
|
border-radius: 2px;
|
|
}
|
|
|
|
.subtitle {
|
|
margin: 4px 0 0 0;
|
|
font-size: 10pt;
|
|
color: #6b7280;
|
|
}
|
|
|
|
.meta {
|
|
text-align: right;
|
|
font-size: 9pt;
|
|
color: #6b7280;
|
|
min-width: 180px;
|
|
line-height: 1.3;
|
|
}
|
|
|
|
.section {
|
|
margin-bottom: 28px;
|
|
padding: 22px 24px;
|
|
background: #ffffff;
|
|
border-radius: var(--radius);
|
|
border: 1px solid var(--color-border);
|
|
box-shadow: var(--shadow);
|
|
page-break-inside: avoid;
|
|
}
|
|
|
|
.section + .section {
|
|
margin-top: 4px;
|
|
}
|
|
|
|
h2 {
|
|
margin: 0 0 14px 0;
|
|
font-size: 18pt;
|
|
font-weight: 600;
|
|
color: var(--color-heading);
|
|
display: flex;
|
|
align-items: center;
|
|
gap: 8px;
|
|
}
|
|
|
|
h3 {
|
|
margin: 16px 0 8px 0;
|
|
font-size: 13pt;
|
|
font-weight: 600;
|
|
color: #374151;
|
|
}
|
|
|
|
.field {
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
gap: 12px;
|
|
margin-bottom: 14px;
|
|
}
|
|
|
|
.field-label {
|
|
flex: 0 0 180px;
|
|
font-size: 9pt;
|
|
font-weight: 600;
|
|
text-transform: uppercase;
|
|
letter-spacing: 0.8px;
|
|
color: #4b5563;
|
|
margin: 0;
|
|
}
|
|
|
|
.field-value {
|
|
flex: 1 1 220px;
|
|
font-size: 11pt;
|
|
color: var(--color-text);
|
|
margin: 0;
|
|
}
|
|
|
|
.financial-table {
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
margin: 16px 0;
|
|
font-size: 10pt;
|
|
}
|
|
|
|
.financial-table th,
|
|
.financial-table td {
|
|
padding: 10px 12px;
|
|
text-align: left;
|
|
vertical-align: top;
|
|
}
|
|
|
|
.financial-table thead th {
|
|
background: var(--color-primary);
|
|
color: #fff;
|
|
font-weight: 600;
|
|
text-transform: uppercase;
|
|
letter-spacing: 0.5px;
|
|
font-size: 9pt;
|
|
border-bottom: 2px solid rgba(255,255,255,0.2);
|
|
}
|
|
|
|
.financial-table tbody tr {
|
|
border-bottom: 1px solid #eceef1;
|
|
}
|
|
|
|
.financial-table tbody tr:nth-child(odd) td {
|
|
background: #fbfcfe;
|
|
}
|
|
|
|
.financial-table td {
|
|
background: #fff;
|
|
color: var(--color-text);
|
|
font-size: 10pt;
|
|
}
|
|
|
|
.financial-table tbody tr:hover td {
|
|
background: #f1f5fa;
|
|
}
|
|
|
|
.summary-box,
|
|
.highlight-box,
|
|
.success-box {
|
|
border-radius: 8px;
|
|
padding: 16px 18px;
|
|
margin: 18px 0;
|
|
position: relative;
|
|
font-size: 11pt;
|
|
}
|
|
|
|
.summary-box {
|
|
background: var(--color-summary-bg);
|
|
border: 1px solid var(--color-summary-border);
|
|
}
|
|
|
|
.highlight-box {
|
|
background: var(--color-highlight-bg);
|
|
border: 1px solid var(--color-highlight-border);
|
|
}
|
|
|
|
.success-box {
|
|
background: var(--color-success-bg);
|
|
border: 1px solid var(--color-success-border);
|
|
}
|
|
|
|
.footer {
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
justify-content: space-between;
|
|
align-items: center;
|
|
padding: 18px 20px;
|
|
font-size: 9pt;
|
|
color: #6b7280;
|
|
border-top: 1px solid var(--color-border);
|
|
margin-top: 30px;
|
|
background: #f9fbfc;
|
|
border-radius: var(--radius);
|
|
gap: 8px;
|
|
}
|
|
|
|
.footer .left,
|
|
.footer .right {
|
|
flex: 1 1 200px;
|
|
}
|
|
|
|
.footer .center {
|
|
flex: 0 0 auto;
|
|
text-align: center;
|
|
}
|
|
|
|
.small {
|
|
font-size: 8.5pt;
|
|
}
|
|
|
|
.divider {
|
|
height: 1px;
|
|
background: var(--color-border);
|
|
margin: 16px 0;
|
|
border: none;
|
|
}
|
|
|
|
/* Utility */
|
|
.inline-block { display: inline-block; }
|
|
.muted { color: #6b7280; }
|
|
|
|
/* Page numbering for PDF (supported in many engines including Puppeteer) */
|
|
.page-footer {
|
|
position: absolute;
|
|
bottom: 0;
|
|
width: 100%;
|
|
font-size: 8pt;
|
|
text-align: center;
|
|
padding: 8px 0;
|
|
color: #9ca3af;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<div class="header">
|
|
<div class="header-left">
|
|
${this.getLogoBase64() ? `
|
|
<div class="logo-container">
|
|
<img src="data:image/png;base64,${this.getLogoBase64()}" alt="Bluepoint Capital Partners" class="logo" />
|
|
<div class="company-info">
|
|
<h2 class="company-name">BLUEPOINT Capital Partners</h2>
|
|
<p class="company-tagline">Professional Investment Analysis</p>
|
|
</div>
|
|
</div>
|
|
<div style="margin-left: 24px;">
|
|
<h1 class="title">CIM Review Report</h1>
|
|
<p class="subtitle">Comprehensive Investment Memorandum Analysis</p>
|
|
</div>
|
|
` : `
|
|
<div>
|
|
<h1 class="title">CIM Review Report</h1>
|
|
<p class="subtitle">BLUEPOINT Capital Partners - Professional Investment Analysis</p>
|
|
</div>
|
|
`}
|
|
</div>
|
|
<div class="meta">
|
|
<div>Generated on ${new Date().toLocaleDateString()}</div>
|
|
<div style="margin-top:4px;">at ${new Date().toLocaleTimeString()}</div>
|
|
</div>
|
|
</div>
|
|
`;
|
|
|
|
sections.forEach(section => {
|
|
if (section.data) {
|
|
html += `<div class="section"><h2><span class="section-icon">${section.icon}</span>${section.title}</h2>`;
|
|
|
|
Object.entries(section.data).forEach(([key, value]) => {
|
|
if (key === 'financials' && typeof value === 'object') {
|
|
// Handle financial table specifically
|
|
html += `<h3>💰 Financial Data</h3>`;
|
|
html += `<table class="financial-table">`;
|
|
html += `<thead><tr><th>Period</th><th>Revenue</th><th>Growth</th><th>EBITDA</th><th>Margin</th></tr></thead>`;
|
|
html += `<tbody>`;
|
|
|
|
const periods = ['fy3', 'fy2', 'fy1', 'ltm'];
|
|
periods.forEach(period => {
|
|
if (value && typeof value === 'object' && value[period as keyof typeof value]) {
|
|
const data = value[period as keyof typeof value] as any;
|
|
html += `
|
|
<tr>
|
|
<td><strong>${period.toUpperCase()}</strong></td>
|
|
<td>${data?.revenue || '-'}</td>
|
|
<td>${data?.revenueGrowth || '-'}</td>
|
|
<td>${data?.ebitda || '-'}</td>
|
|
<td>${data?.ebitdaMargin || '-'}</td>
|
|
</tr>
|
|
`;
|
|
}
|
|
});
|
|
html += `</tbody></table>`;
|
|
} else if (value && typeof value === 'object' && !Array.isArray(value)) {
|
|
// Handle nested objects (but skip financials since we handled it above)
|
|
html += `<h3>📋 ${this.formatFieldName(key)}</h3>`;
|
|
Object.entries(value).forEach(([subKey, subValue]) => {
|
|
if (subValue && typeof subValue !== 'object') {
|
|
html += `
|
|
<div class="field">
|
|
<span class="field-label">${this.formatFieldName(subKey)}</span>
|
|
<span class="field-value">${subValue}</span>
|
|
</div>
|
|
`;
|
|
}
|
|
});
|
|
} else if (value) {
|
|
// Handle simple fields
|
|
html += `
|
|
<div class="field">
|
|
<span class="field-label">${this.formatFieldName(key)}</span>
|
|
<span class="field-value">${value}</span>
|
|
</div>
|
|
`;
|
|
}
|
|
});
|
|
|
|
html += `</div>`;
|
|
}
|
|
});
|
|
|
|
html += `
|
|
<!-- Footer -->
|
|
<div class="footer">
|
|
<div class="left">
|
|
<strong>BLUEPOINT Capital Partners</strong> | CIM Document Processor | Confidential
|
|
</div>
|
|
<div class="center small">
|
|
Generated on ${new Date().toLocaleDateString()} at ${new Date().toLocaleTimeString()}
|
|
</div>
|
|
<div class="right" style="text-align:right;">
|
|
Page <span class="page-number"></span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Optional script to inject page numbers if using Puppeteer -->
|
|
<script>
|
|
// Puppeteer can replace this with its own page numbering; if not, simple fallback:
|
|
document.querySelectorAll('.page-number').forEach(el => {
|
|
// placeholder; leave blank or inject via PDF generation tooling
|
|
el.textContent = '';
|
|
});
|
|
</script>
|
|
</body>
|
|
</html>
|
|
`;
|
|
|
|
return html;
|
|
}
|
|
|
|
/**
|
|
* Get logo as base64 string for embedding in HTML
|
|
*/
|
|
private getLogoBase64(): string {
|
|
try {
|
|
const logoPath = path.join(__dirname, '../assets/bluepoint-logo.png');
|
|
const logoBuffer = fs.readFileSync(logoPath);
|
|
return logoBuffer.toString('base64');
|
|
} catch (error) {
|
|
logger.error('Failed to load logo:', error);
|
|
// Return empty string if logo not found - this will hide the logo but allow PDF generation to continue
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format field names for display
|
|
*/
|
|
private formatFieldName(fieldName: string): string {
|
|
return fieldName
|
|
.replace(/([A-Z])/g, ' $1')
|
|
.replace(/^./, str => str.toUpperCase())
|
|
.replace(/([A-Z]{2,})/g, match => match.charAt(0) + match.slice(1).toLowerCase());
|
|
}
|
|
|
|
/**
|
|
* Close browser instance
|
|
*/
|
|
async close(): Promise<void> {
|
|
// Close all pages in the pool
|
|
for (const poolItem of this.pagePool) {
|
|
try {
|
|
await poolItem.page.close();
|
|
} catch (error) {
|
|
logger.error('Error closing page:', error);
|
|
}
|
|
}
|
|
this.pagePool = [];
|
|
|
|
// Clear cache
|
|
this.cache.clear();
|
|
|
|
// Close browser
|
|
if (this.browser) {
|
|
await this.browser.close();
|
|
this.browser = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean up temporary files
|
|
*/
|
|
async cleanup(): Promise<void> {
|
|
await this.close();
|
|
}
|
|
|
|
/**
|
|
* Get service statistics
|
|
*/
|
|
getStats(): {
|
|
pagePoolSize: number;
|
|
cacheSize: number;
|
|
activePages: number;
|
|
} {
|
|
return {
|
|
pagePoolSize: this.pagePool.length,
|
|
cacheSize: this.cache.size,
|
|
activePages: this.pagePool.filter(p => p.inUse).length,
|
|
};
|
|
}
|
|
}
|
|
|
|
export const pdfGenerationService = new PDFGenerationService();
|
|
export default pdfGenerationService;
|