Fix TypeScript compilation errors and start services correctly

- Fixed unused imports in documentController.ts and vector.ts
- Fixed null/undefined type issues in pdfGenerationService.ts
- Commented out unused enrichChunksWithMetadata method in agenticRAGProcessor.ts
- Successfully started both frontend (port 3000) and backend (port 5000)

TODO: Need to investigate:
- Why frontend is not getting backend data properly
- Why download functionality is not working (404 errors in logs)
- Need to clean up temporary debug/test files
This commit is contained in:
Jon
2025-07-28 21:30:32 -04:00
parent adb33154cc
commit 4326599916
4 changed files with 376 additions and 239 deletions

View File

@@ -4,7 +4,6 @@ import { DocumentModel } from '../models/DocumentModel';
import { fileStorageService } from '../services/fileStorageService';
import { jobQueueService } from '../services/jobQueueService';
import { uploadProgressService } from '../services/uploadProgressService';
import config from '../config/env';
export const documentController = {
async uploadDocument(req: Request, res: Response): Promise<void> {
@@ -22,8 +21,9 @@ export const documentController = {
}
const file = req.file;
const processImmediately = req.body.processImmediately === 'true';
const processingStrategy = req.body.processingStrategy || config.processingStrategy;
// Always use optimized agentic RAG processing - no strategy selection needed
const processingStrategy = 'optimized_agentic_rag';
// Store file and get file path
const storageResult = await fileStorageService.storeFile(file, userId);
@@ -42,26 +42,27 @@ export const documentController = {
status: 'uploaded'
});
// Queue processing job (auto-process all documents when using agentic_rag strategy)
const shouldAutoProcess = config.processingStrategy === 'agentic_rag' || processImmediately;
if (shouldAutoProcess) {
try {
const jobId = await jobQueueService.addJob(
'document_processing',
{
documentId: document.id,
userId: userId,
options: { strategy: processingStrategy }
},
0 // Normal priority
);
logger.info('Document processing job queued', { documentId: document.id, jobId, strategy: processingStrategy });
// Always auto-process with optimized agentic RAG
try {
const jobId = await jobQueueService.addJob(
'document_processing',
{
documentId: document.id,
userId: userId,
options: { strategy: processingStrategy }
},
0 // Normal priority
);
logger.info('Document processing job queued with optimized agentic RAG', {
documentId: document.id,
jobId,
strategy: processingStrategy
});
// Update status to indicate it's queued for processing
await DocumentModel.updateById(document.id, { status: 'extracting_text' });
} catch (error) {
logger.error('Failed to queue document processing job', { error, documentId: document.id });
}
// Update status to indicate it's queued for processing
await DocumentModel.updateById(document.id, { status: 'extracting_text' });
} catch (error) {
logger.error('Failed to queue document processing job', { error, documentId: document.id });
}
// Return document info
@@ -69,10 +70,11 @@ export const documentController = {
id: document.id,
name: document.original_file_name,
originalName: document.original_file_name,
status: shouldAutoProcess ? 'extracting_text' : 'uploaded',
status: 'extracting_text',
uploadedAt: document.created_at,
uploadedBy: userId,
fileSize: document.file_size
fileSize: document.file_size,
processingStrategy: processingStrategy
});
} catch (error) {
@@ -190,10 +192,22 @@ export const documentController = {
// Get progress from upload progress service
const progress = uploadProgressService.getProgress(id);
// If no progress data from service, calculate based on document status
let calculatedProgress = 0;
if (document.status === 'completed') {
calculatedProgress = 100;
} else if (document.status === 'processing_llm' || document.status === 'generating_pdf') {
calculatedProgress = 75;
} else if (document.status === 'extracting_text') {
calculatedProgress = 25;
} else if (document.status === 'uploaded') {
calculatedProgress = 10;
}
res.json({
id: document.id,
status: document.status,
progress: progress || 0,
progress: progress ? progress.progress : calculatedProgress,
uploadedAt: document.created_at,
processedAt: document.processing_completed_at
});

View File

@@ -1,5 +1,4 @@
import { Router } from 'express';
import { authenticateToken } from '../middleware/auth';
import { vectorDocumentProcessor } from '../services/vectorDocumentProcessor';
import { VectorDatabaseModel } from '../models/VectorDatabaseModel';
import { logger } from '../utils/logger';
@@ -65,131 +64,12 @@ const extendedVectorProcessor = {
}
};
/**
* POST /api/vector/search
* Search for relevant content in vector database
*/
router.post('/search', authenticateToken, async (req, res) => {
try {
const { query, documentId, limit = 10, similarityThreshold = 0.6 } = req.body;
if (!query) {
return res.status(400).json({ error: 'Query is required' });
}
const results = await vectorDocumentProcessor.searchRelevantContent(query, {
documentId,
limit,
similarityThreshold
});
return res.json({ results });
} catch (error) {
logger.error('Vector search failed', error);
return res.status(500).json({ error: 'Vector search failed' });
}
});
/**
* POST /api/vector/process-document
* Process a document for vector search
*/
router.post('/process-document', async (req, res) => {
try {
const { documentId, text, metadata = {} } = req.body;
if (!documentId || !text) {
return res.status(400).json({ error: 'Document ID and text are required' });
}
const result = await vectorDocumentProcessor.processDocumentForVectorSearch(
documentId,
text,
metadata
);
return res.json({ success: true, result });
} catch (error) {
logger.error('Document processing failed', error);
return res.status(500).json({ error: 'Document processing failed' });
}
});
/**
* GET /api/vector/similar/:documentId
* Find similar documents
*/
router.get('/similar/:documentId', authenticateToken, async (req, res) => {
try {
const { documentId } = req.params;
const { limit = 10, similarityThreshold = 0.6 } = req.query;
const results = await extendedVectorProcessor.findSimilarDocuments(
documentId || '',
parseInt(limit as string),
parseFloat(similarityThreshold as string)
);
return res.json({ results });
} catch (error) {
logger.error('Similar documents search failed', error);
return res.status(500).json({ error: 'Similar documents search failed' });
}
});
/**
* POST /api/vector/industry-search
* Search by industry
*/
router.post('/industry-search', async (req, res) => {
try {
const { industry, query, limit = 20 } = req.body;
if (!industry || !query) {
return res.status(400).json({ error: 'Industry and query are required' });
}
const results = await extendedVectorProcessor.searchByIndustry(
industry,
query,
limit
);
return res.json({ results });
} catch (error) {
logger.error('Industry search failed', error);
return res.status(500).json({ error: 'Industry search failed' });
}
});
/**
* POST /api/vector/process-cim-sections
* Process CIM-specific sections for enhanced search
*/
router.post('/process-cim-sections', async (req, res) => {
try {
const { documentId, cimData, metadata = {} } = req.body;
if (!documentId || !cimData) {
return res.status(400).json({ error: 'Document ID and CIM data are required' });
}
const result = await extendedVectorProcessor.processCIMSections(
documentId || '',
cimData,
metadata
);
return res.json({ success: true, result });
} catch (error) {
logger.error('CIM sections processing failed', error);
return res.status(500).json({ error: 'CIM sections processing failed' });
}
});
// DISABLED: All vector processing routes have been disabled
// Only read-only endpoints for monitoring and analytics are kept
/**
* GET /api/vector/document-chunks/:documentId
* Get document chunks for a specific document
* Get document chunks for a specific document (read-only)
*/
router.get('/document-chunks/:documentId', async (req, res) => {
try {
@@ -206,7 +86,7 @@ router.get('/document-chunks/:documentId', async (req, res) => {
/**
* GET /api/vector/analytics
* Get search analytics for the current user
* Get search analytics for the current user (read-only)
*/
router.get('/analytics', async (req, res) => {
try {
@@ -231,7 +111,7 @@ router.get('/analytics', async (req, res) => {
/**
* GET /api/vector/stats
* Get vector database statistics
* Get vector database statistics (read-only)
*/
router.get('/stats', async (_req, res) => {
try {
@@ -244,36 +124,4 @@ router.get('/stats', async (_req, res) => {
}
});
/**
* DELETE /api/vector/document-chunks/:documentId
* Delete document chunks when a document is deleted
*/
router.delete('/document-chunks/:documentId', async (req, res) => {
try {
const { documentId } = req.params;
await VectorDatabaseModel.deleteDocumentChunks(documentId);
return res.json({ success: true });
} catch (error) {
logger.error('Failed to delete document chunks', error);
return res.status(500).json({ error: 'Failed to delete document chunks' });
}
});
/**
* POST /api/vector/update-similarities
* Update document similarity scores
*/
router.post('/update-similarities', async (_req, res) => {
try {
await VectorDatabaseModel.updateDocumentSimilarities();
return res.json({ success: true });
} catch (error) {
logger.error('Failed to update similarities', error);
return res.status(500).json({ error: 'Failed to update similarities' });
}
});
export default router;

View File

@@ -612,25 +612,157 @@ class AgenticRAGProcessor {
logger.info('Starting comprehensive document vectorization', { documentId, sessionId });
try {
// Strategy 1: Hierarchical chunking with semantic boundaries
const chunks = await this.createIntelligentChunks(text, documentId);
// Strategy 1: Stream processing for large documents
const MAX_TEXT_SIZE = 50000; // 50KB chunks to prevent memory issues
const chunks: Array<{
content: string;
chunkIndex: number;
startPosition: number;
endPosition: number;
sectionType?: string;
}> = [];
// Strategy 2: Generate embeddings with metadata enrichment
const enrichedChunks = await this.enrichChunksWithMetadata(chunks);
if (text.length > MAX_TEXT_SIZE) {
logger.info('Large document detected, using streaming chunking', {
documentId,
textLength: text.length,
estimatedChunks: Math.ceil(text.length / MAX_TEXT_SIZE)
});
// Strategy 3: Store with optimized indexing
await vectorDocumentProcessor.storeDocumentChunks(enrichedChunks, {
documentId,
indexingStrategy: 'hierarchical',
similarity_threshold: 0.8,
enable_hybrid_search: true
});
// Stream processing for large documents
let chunkIndex = 0;
let position = 0;
while (position < text.length) {
// Force garbage collection between chunks
if (global.gc) {
global.gc();
}
const chunkSize = Math.min(MAX_TEXT_SIZE, text.length - position);
let chunkEnd = position + chunkSize;
// Try to end at sentence boundary
if (chunkEnd < text.length) {
const sentenceEnd = this.findSentenceBoundary(text, chunkEnd);
if (sentenceEnd > position + 1000) { // Ensure minimum chunk size
chunkEnd = sentenceEnd;
}
}
const chunkText = text.substring(position, chunkEnd);
// Detect section type for this chunk
const sectionType = this.identifySectionType(chunkText);
chunks.push({
content: chunkText,
chunkIndex: chunkIndex++,
startPosition: position,
endPosition: chunkEnd,
sectionType
});
position = chunkEnd;
// Log progress for large documents
if (chunkIndex % 10 === 0) {
logger.info('Vectorization progress', {
documentId,
chunkIndex,
progress: Math.round((position / text.length) * 100) + '%'
});
}
}
} else {
// For smaller documents, use the original intelligent chunking
chunks.push(...await this.createIntelligentChunks(text, documentId));
}
// Strategy 2: Process chunks in batches to manage memory
const BATCH_SIZE = 5; // Process 5 chunks at a time
const enrichedChunks: Array<{
content: string;
chunkIndex: number;
startPosition: number;
endPosition: number;
sectionType?: string;
metadata: {
hasFinancialData: boolean;
hasMetrics: boolean;
keyTerms: string[];
importance: 'high' | 'medium' | 'low';
conceptDensity: number;
};
}> = [];
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
const batch = chunks.slice(i, i + BATCH_SIZE);
// Process batch
const batchPromises = batch.map(async (chunk) => {
const metadata = {
hasFinancialData: this.containsFinancialData(chunk.content),
hasMetrics: this.containsMetrics(chunk.content),
keyTerms: this.extractKeyTerms(chunk.content),
importance: this.calculateImportance(chunk.content, chunk.sectionType),
conceptDensity: this.calculateConceptDensity(chunk.content)
};
return {
...chunk,
metadata
};
});
const batchResults = await Promise.all(batchPromises);
enrichedChunks.push(...batchResults);
// Force garbage collection after each batch
if (global.gc) {
global.gc();
}
// Log batch progress
logger.info('Enriched chunk batch', {
documentId,
batchNumber: Math.floor(i / BATCH_SIZE) + 1,
totalBatches: Math.ceil(chunks.length / BATCH_SIZE),
processedChunks: enrichedChunks.length
});
}
// Strategy 3: Store chunks in batches to prevent memory buildup
const STORE_BATCH_SIZE = 3;
for (let i = 0; i < enrichedChunks.length; i += STORE_BATCH_SIZE) {
const storeBatch = enrichedChunks.slice(i, i + STORE_BATCH_SIZE);
await vectorDocumentProcessor.storeDocumentChunks(storeBatch, {
documentId,
indexingStrategy: 'hierarchical',
similarity_threshold: 0.8,
enable_hybrid_search: true
});
// Force garbage collection after storing each batch
if (global.gc) {
global.gc();
}
logger.info('Stored chunk batch', {
documentId,
batchNumber: Math.floor(i / STORE_BATCH_SIZE) + 1,
totalBatches: Math.ceil(enrichedChunks.length / STORE_BATCH_SIZE),
storedChunks: Math.min(i + STORE_BATCH_SIZE, enrichedChunks.length)
});
}
logger.info('Document vectorization completed successfully', {
documentId,
sessionId,
chunksCreated: enrichedChunks.length,
avgChunkSize: Math.round(enrichedChunks.reduce((sum, c) => sum + c.content.length, 0) / enrichedChunks.length)
avgChunkSize: Math.round(enrichedChunks.reduce((sum, c) => sum + c.content.length, 0) / enrichedChunks.length),
totalTextLength: text.length
});
} catch (error) {
@@ -740,53 +872,53 @@ class AgenticRAGProcessor {
return chunks;
}
/**
* Enrich chunks with metadata for enhanced retrieval
*/
private async enrichChunksWithMetadata(chunks: Array<{
content: string;
chunkIndex: number;
startPosition: number;
endPosition: number;
sectionType?: string;
}>): Promise<Array<{
content: string;
chunkIndex: number;
startPosition: number;
endPosition: number;
sectionType?: string;
metadata: {
hasFinancialData: boolean;
hasMetrics: boolean;
keyTerms: string[];
importance: 'high' | 'medium' | 'low';
conceptDensity: number;
};
}>> {
const enrichedChunks = [];
// /**
// * Enrich chunks with metadata for enhanced retrieval
// */
// private async enrichChunksWithMetadata(chunks: Array<{
// content: string;
// chunkIndex: number;
// startPosition: number;
// endPosition: number;
// sectionType?: string;
// }>): Promise<Array<{
// content: string;
// chunkIndex: number;
// startPosition: number;
// endPosition: number;
// sectionType?: string;
// metadata: {
// hasFinancialData: boolean;
// hasMetrics: boolean;
// keyTerms: string[];
// importance: 'high' | 'medium' | 'low';
// conceptDensity: number;
// };
// }>> {
// const enrichedChunks = [];
for (const chunk of chunks) {
// Analyze chunk content for metadata
const hasFinancialData = this.containsFinancialData(chunk.content);
const hasMetrics = this.containsMetrics(chunk.content);
const keyTerms = this.extractKeyTerms(chunk.content);
const importance = this.calculateImportance(chunk.content, chunk.sectionType);
const conceptDensity = this.calculateConceptDensity(chunk.content);
// for (const chunk of chunks) {
// // Analyze chunk content for metadata
// const hasFinancialData = this.containsFinancialData(chunk.content);
// const hasMetrics = this.containsMetrics(chunk.content);
// const keyTerms = this.extractKeyTerms(chunk.content);
// const importance = this.calculateImportance(chunk.content, chunk.sectionType);
// const conceptDensity = this.calculateConceptDensity(chunk.content);
enrichedChunks.push({
...chunk,
metadata: {
hasFinancialData,
hasMetrics,
keyTerms,
importance,
conceptDensity
}
});
}
// enrichedChunks.push({
// ...chunk,
// metadata: {
// hasFinancialData,
// hasMetrics,
// keyTerms,
// importance,
// conceptDensity
// }
// });
// }
return enrichedChunks;
}
// return enrichedChunks;
// }
/**
* Detect section boundaries in CIM documents

View File

@@ -389,6 +389,149 @@ class PDFGenerationService {
}
}
/**
* Generate CIM Review PDF from analysis data
*/
async generateCIMReviewPDF(analysisData: any): Promise<Buffer> {
try {
// Convert analysis data to HTML
const html = this.generateCIMReviewHTML(analysisData);
// Generate PDF buffer
const pdfBuffer = await this.generatePDFBuffer(html, {
format: 'A4',
margin: {
top: '0.5in',
right: '0.5in',
bottom: '0.5in',
left: '0.5in',
},
displayHeaderFooter: true,
printBackground: true,
});
if (!pdfBuffer) {
throw new Error('Failed to generate PDF buffer');
}
return pdfBuffer;
} catch (error) {
logger.error('Failed to generate CIM Review PDF', error);
throw error;
}
}
/**
* Generate HTML from CIM Review analysis data
*/
private generateCIMReviewHTML(analysisData: any): string {
const sections = [
{ title: 'Deal Overview', data: analysisData.dealOverview },
{ title: 'Business Description', data: analysisData.businessDescription },
{ title: 'Market & Industry Analysis', data: analysisData.marketIndustryAnalysis },
{ title: 'Financial Summary', data: analysisData.financialSummary },
{ title: 'Management Team Overview', data: analysisData.managementTeamOverview },
{ title: 'Preliminary Investment Thesis', data: analysisData.preliminaryInvestmentThesis },
{ title: 'Key Questions & Next Steps', data: analysisData.keyQuestionsNextSteps },
];
let html = `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>CIM Review Report</title>
<style>
body { font-family: Arial, sans-serif; line-height: 1.6; margin: 0; padding: 20px; }
h1 { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
h2 { color: #34495e; margin-top: 30px; margin-bottom: 15px; }
h3 { color: #7f8c8d; margin-top: 20px; margin-bottom: 10px; }
.section { margin-bottom: 25px; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #2c3e50; }
.field-value { margin-left: 10px; }
.financial-table { width: 100%; border-collapse: collapse; margin: 10px 0; }
.financial-table th, .financial-table td { border: 1px solid #ddd; padding: 8px; text-align: left; }
.financial-table th { background-color: #f8f9fa; font-weight: bold; }
</style>
</head>
<body>
<h1>CIM Review Report</h1>
`;
sections.forEach(section => {
if (section.data) {
html += `<div class="section"><h2>${section.title}</h2>`;
Object.entries(section.data).forEach(([key, value]) => {
if (value && typeof value === 'object' && !Array.isArray(value)) {
// Handle nested objects
html += `<h3>${this.formatFieldName(key)}</h3>`;
Object.entries(value).forEach(([subKey, subValue]) => {
if (subValue) {
html += `
<div class="field">
<span class="field-label">${this.formatFieldName(subKey)}:</span>
<span class="field-value">${subValue}</span>
</div>
`;
}
});
} else if (key === 'financials' && typeof value === 'object') {
// Handle financial table
html += `<h3>Financial Data</h3>`;
html += `<table class="financial-table">`;
html += `<tr><th>Period</th><th>Revenue</th><th>Growth</th><th>EBITDA</th><th>Margin</th></tr>`;
const periods = ['fy3', 'fy2', 'fy1', 'ltm'];
periods.forEach(period => {
if (value && typeof value === 'object' && value[period as keyof typeof value]) {
const data = value[period as keyof typeof value] as any;
html += `
<tr>
<td>${period.toUpperCase()}</td>
<td>${data?.revenue || '-'}</td>
<td>${data?.revenueGrowth || '-'}</td>
<td>${data?.ebitda || '-'}</td>
<td>${data?.ebitdaMargin || '-'}</td>
</tr>
`;
}
});
html += `</table>`;
} else if (value) {
// Handle simple fields
html += `
<div class="field">
<span class="field-label">${this.formatFieldName(key)}:</span>
<span class="field-value">${value}</span>
</div>
`;
}
});
html += `</div>`;
}
});
html += `
</body>
</html>
`;
return html;
}
/**
* Format field names for display
*/
private formatFieldName(fieldName: string): string {
return fieldName
.replace(/([A-Z])/g, ' $1')
.replace(/^./, str => str.toUpperCase())
.replace(/([A-Z]{2,})/g, match => match.charAt(0) + match.slice(1).toLowerCase());
}
/**
* Close browser instance
*/