fix(core): Overhaul and fix the end-to-end document processing pipeline
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"name": "cim-processor-backend",
|
||||
"version": "1.0.0",
|
||||
"description": "Backend API for CIM Document Processor",
|
||||
"main": "index.js",
|
||||
"main": "dist/index.js",
|
||||
"scripts": {
|
||||
"dev": "ts-node-dev --respawn --transpile-only --max-old-space-size=8192 --expose-gc src/index.ts",
|
||||
"build": "tsc && node src/scripts/prepare-dist.js && cp .puppeteerrc.cjs dist/",
|
||||
|
||||
@@ -7,10 +7,11 @@ import { uploadProgressService } from '../services/uploadProgressService';
|
||||
import { uploadMonitoringService } from '../services/uploadMonitoringService';
|
||||
|
||||
export const documentController = {
|
||||
async uploadDocument(req: Request, res: Response): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const structuredLogger = new StructuredLogger(req.correlationId);
|
||||
|
||||
async getUploadUrl(req: Request, res: Response): Promise<void> {
|
||||
console.log('🎯🎯🎯 GET UPLOAD URL ENDPOINT HIT!');
|
||||
console.log('🎯 Method:', req.method);
|
||||
console.log('🎯 URL:', req.url);
|
||||
console.log('🎯 Headers:', JSON.stringify(req.headers, null, 2));
|
||||
try {
|
||||
const userId = req.user?.uid;
|
||||
if (!userId) {
|
||||
@@ -21,206 +22,369 @@ export const documentController = {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if file was uploaded
|
||||
if (!req.file) {
|
||||
const { fileName, fileSize, contentType } = req.body;
|
||||
|
||||
if (!fileName || !fileSize || !contentType) {
|
||||
res.status(400).json({
|
||||
error: 'No file uploaded',
|
||||
error: 'Missing required fields: fileName, fileSize, contentType',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const file = req.file;
|
||||
|
||||
// Track upload start
|
||||
const uploadEventData: any = {
|
||||
userId,
|
||||
fileInfo: {
|
||||
originalName: file.originalname,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
},
|
||||
status: 'started',
|
||||
stage: 'upload_initiated',
|
||||
};
|
||||
|
||||
if (req.correlationId) {
|
||||
uploadEventData.correlationId = req.correlationId;
|
||||
}
|
||||
|
||||
uploadMonitoringService.trackUploadEvent(uploadEventData);
|
||||
|
||||
structuredLogger.uploadStart({
|
||||
originalName: file.originalname,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
}, userId);
|
||||
|
||||
// Always use optimized agentic RAG processing - no strategy selection needed
|
||||
const processingStrategy = 'optimized_agentic_rag';
|
||||
|
||||
// Store file and get file path
|
||||
const storageResult = await fileStorageService.storeFile(file, userId);
|
||||
|
||||
if (!storageResult.success || !storageResult.fileInfo) {
|
||||
const processingTime = Date.now() - startTime;
|
||||
|
||||
// Track upload failure
|
||||
const failureEventData: any = {
|
||||
userId,
|
||||
fileInfo: {
|
||||
originalName: file.originalname,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
},
|
||||
status: 'failed',
|
||||
stage: 'file_storage',
|
||||
error: {
|
||||
message: storageResult.error || 'Failed to store file',
|
||||
type: 'storage_error',
|
||||
code: 'STORAGE_ERROR',
|
||||
},
|
||||
processingTime,
|
||||
};
|
||||
|
||||
if (req.correlationId) {
|
||||
failureEventData.correlationId = req.correlationId;
|
||||
}
|
||||
|
||||
uploadMonitoringService.trackUploadEvent(failureEventData);
|
||||
|
||||
structuredLogger.uploadError(
|
||||
new Error(storageResult.error || 'Failed to store file'),
|
||||
{
|
||||
originalName: file.originalname,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
},
|
||||
userId,
|
||||
'file_storage'
|
||||
);
|
||||
|
||||
res.status(500).json({
|
||||
error: 'Failed to store file',
|
||||
// Validate file type
|
||||
if (contentType !== 'application/pdf') {
|
||||
res.status(400).json({
|
||||
error: 'Only PDF files are supported',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Create document record
|
||||
// Validate file size (max 50MB)
|
||||
if (fileSize > 50 * 1024 * 1024) {
|
||||
res.status(400).json({
|
||||
error: 'File size exceeds 50MB limit',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate unique file path
|
||||
const timestamp = Date.now();
|
||||
const sanitizedFileName = fileName.replace(/[^a-zA-Z0-9.-]/g, '_');
|
||||
const filePath = `uploads/${userId}/${timestamp}_${sanitizedFileName}`;
|
||||
|
||||
// Create document record first
|
||||
const document = await DocumentModel.create({
|
||||
user_id: userId,
|
||||
original_file_name: file.originalname,
|
||||
file_path: storageResult.fileInfo.path,
|
||||
file_size: file.size,
|
||||
status: 'uploaded'
|
||||
original_file_name: fileName,
|
||||
file_path: filePath,
|
||||
file_size: fileSize,
|
||||
status: 'uploading'
|
||||
});
|
||||
|
||||
// Always auto-process with optimized agentic RAG
|
||||
try {
|
||||
const jobId = await jobQueueService.addJob(
|
||||
'document_processing',
|
||||
{
|
||||
// Generate signed upload URL
|
||||
const { fileStorageService } = await import('../services/fileStorageService');
|
||||
const uploadUrl = await fileStorageService.generateSignedUploadUrl(filePath, contentType);
|
||||
|
||||
console.log('✅ Generated upload URL for document:', document.id);
|
||||
|
||||
res.status(200).json({
|
||||
documentId: document.id,
|
||||
userId: userId,
|
||||
options: { strategy: processingStrategy }
|
||||
},
|
||||
0 // Normal priority
|
||||
);
|
||||
logger.info('Document processing job queued with optimized agentic RAG', {
|
||||
documentId: document.id,
|
||||
jobId,
|
||||
strategy: processingStrategy
|
||||
});
|
||||
|
||||
// Update status to indicate it's queued for processing
|
||||
await DocumentModel.updateById(document.id, { status: 'extracting_text' });
|
||||
} catch (error) {
|
||||
logger.error('Failed to queue document processing job', { error, documentId: document.id });
|
||||
}
|
||||
|
||||
// Track upload success
|
||||
const processingTime = Date.now() - startTime;
|
||||
const successEventData: any = {
|
||||
userId,
|
||||
fileInfo: {
|
||||
originalName: file.originalname,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
},
|
||||
status: 'success',
|
||||
stage: 'upload_completed',
|
||||
processingTime,
|
||||
};
|
||||
|
||||
if (req.correlationId) {
|
||||
successEventData.correlationId = req.correlationId;
|
||||
}
|
||||
|
||||
uploadMonitoringService.trackUploadEvent(successEventData);
|
||||
|
||||
structuredLogger.uploadSuccess({
|
||||
originalName: file.originalname,
|
||||
size: file.size,
|
||||
mimetype: file.mimetype,
|
||||
}, userId, processingTime);
|
||||
|
||||
// Return document info
|
||||
res.status(201).json({
|
||||
id: document.id,
|
||||
name: document.original_file_name,
|
||||
originalName: document.original_file_name,
|
||||
status: 'extracting_text',
|
||||
uploadedAt: document.created_at,
|
||||
uploadedBy: userId,
|
||||
fileSize: document.file_size,
|
||||
processingStrategy: processingStrategy,
|
||||
uploadUrl: uploadUrl,
|
||||
filePath: filePath,
|
||||
correlationId: req.correlationId || undefined
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
const processingTime = Date.now() - startTime;
|
||||
console.log('❌ Get upload URL error:', error);
|
||||
logger.error('Get upload URL failed', {
|
||||
error,
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
|
||||
// Track upload failure
|
||||
const errorEventData: any = {
|
||||
userId: req.user?.uid || 'unknown',
|
||||
fileInfo: {
|
||||
originalName: req.file?.originalname || 'unknown',
|
||||
size: req.file?.size || 0,
|
||||
mimetype: req.file?.mimetype || 'unknown',
|
||||
},
|
||||
status: 'failed',
|
||||
stage: 'upload_error',
|
||||
error: {
|
||||
res.status(500).json({
|
||||
error: 'Failed to generate upload URL',
|
||||
message: error instanceof Error ? error.message : 'Unknown error',
|
||||
type: 'upload_error',
|
||||
correlationId: req.correlationId || undefined
|
||||
});
|
||||
}
|
||||
},
|
||||
processingTime,
|
||||
};
|
||||
|
||||
if (req.correlationId) {
|
||||
errorEventData.correlationId = req.correlationId;
|
||||
async confirmUpload(req: Request, res: Response): Promise<void> {
|
||||
try {
|
||||
const userId = req.user?.uid;
|
||||
if (!userId) {
|
||||
res.status(401).json({
|
||||
error: 'User not authenticated',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
uploadMonitoringService.trackUploadEvent(errorEventData);
|
||||
const { id: documentId } = req.params;
|
||||
if (!documentId) {
|
||||
res.status(400).json({
|
||||
error: 'Document ID is required',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
structuredLogger.uploadError(
|
||||
error,
|
||||
{
|
||||
originalName: req.file?.originalname || 'unknown',
|
||||
size: req.file?.size || 0,
|
||||
mimetype: req.file?.mimetype || 'unknown',
|
||||
},
|
||||
req.user?.uid || 'unknown',
|
||||
'upload_error'
|
||||
// Get document record
|
||||
const document = await DocumentModel.findById(documentId);
|
||||
if (!document) {
|
||||
res.status(404).json({
|
||||
error: 'Document not found',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify user owns document
|
||||
if (document.user_id !== userId) {
|
||||
res.status(403).json({
|
||||
error: 'Access denied',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('🔄 Starting Document AI processing for:', documentId);
|
||||
|
||||
// Update status to processing
|
||||
await DocumentModel.updateById(documentId, {
|
||||
status: 'processing_llm'
|
||||
});
|
||||
|
||||
// Acknowledge the request immediately
|
||||
res.status(202).json({
|
||||
message: 'Upload confirmed, processing has started.',
|
||||
documentId: documentId,
|
||||
status: 'processing'
|
||||
});
|
||||
|
||||
// Process in the background
|
||||
(async () => {
|
||||
try {
|
||||
// Download file from Firebase Storage for Document AI processing
|
||||
const { fileStorageService } = await import('../services/fileStorageService');
|
||||
|
||||
let fileBuffer: Buffer | null = null;
|
||||
for (let i = 0; i < 3; i++) {
|
||||
await new Promise(resolve => setTimeout(resolve, 2000)); // 2 second delay
|
||||
fileBuffer = await fileStorageService.getFile(document.file_path);
|
||||
if (fileBuffer) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fileBuffer) {
|
||||
await DocumentModel.updateById(documentId, {
|
||||
status: 'failed',
|
||||
error_message: 'Failed to download uploaded file'
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Process with Unified Document Processor
|
||||
const { unifiedDocumentProcessor } = await import('../services/unifiedDocumentProcessor');
|
||||
|
||||
const result = await unifiedDocumentProcessor.processDocument(
|
||||
documentId,
|
||||
userId,
|
||||
'', // Text is not needed for this strategy
|
||||
{ strategy: 'optimized_agentic_rag' }
|
||||
);
|
||||
|
||||
if (result.success) {
|
||||
// Update document with results
|
||||
await DocumentModel.updateById(documentId, {
|
||||
status: 'completed',
|
||||
generated_summary: result.summary,
|
||||
processing_completed_at: new Date()
|
||||
});
|
||||
|
||||
// 🗑️ DELETE PDF after successful processing
|
||||
try {
|
||||
await fileStorageService.deleteFile(document.file_path);
|
||||
console.log('✅ PDF deleted after successful processing:', document.file_path);
|
||||
} catch (deleteError) {
|
||||
console.log('⚠️ Failed to delete PDF file:', deleteError);
|
||||
logger.warn('Failed to delete PDF after processing', {
|
||||
filePath: document.file_path,
|
||||
documentId,
|
||||
error: deleteError
|
||||
});
|
||||
}
|
||||
|
||||
console.log('✅ Document AI processing completed successfully');
|
||||
} else {
|
||||
await DocumentModel.updateById(documentId, {
|
||||
status: 'failed',
|
||||
error_message: result.error
|
||||
});
|
||||
|
||||
// Also delete PDF on processing failure to avoid storage costs
|
||||
try {
|
||||
await fileStorageService.deleteFile(document.file_path);
|
||||
console.log('🗑️ PDF deleted after processing failure');
|
||||
} catch (deleteError) {
|
||||
console.log('⚠️ Failed to delete PDF file after error:', deleteError);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('❌ Background processing error:', error);
|
||||
logger.error('Background processing failed', {
|
||||
error,
|
||||
documentId
|
||||
});
|
||||
await DocumentModel.updateById(documentId, {
|
||||
status: 'failed',
|
||||
error_message: 'Background processing failed'
|
||||
});
|
||||
}
|
||||
})();
|
||||
|
||||
} catch (error) {
|
||||
console.log('❌ Confirm upload error:', error);
|
||||
logger.error('Confirm upload failed', {
|
||||
error,
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
|
||||
res.status(500).json({
|
||||
error: 'Upload confirmation failed',
|
||||
message: error instanceof Error ? error.message : 'Unknown error',
|
||||
correlationId: req.correlationId || undefined
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
async uploadDocument(req: Request, res: Response): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 🔍 COMPREHENSIVE DEBUG: Log everything about the request
|
||||
console.log('🚀 =========================');
|
||||
console.log('🚀 DOCUMENT AI UPLOAD STARTED');
|
||||
console.log('🚀 Method:', req.method);
|
||||
console.log('🚀 URL:', req.url);
|
||||
console.log('🚀 Content-Type:', req.get('Content-Type'));
|
||||
console.log('🚀 Content-Length:', req.get('Content-Length'));
|
||||
console.log('🚀 Authorization header present:', !!req.get('Authorization'));
|
||||
console.log('🚀 User from token:', req.user?.uid || 'NOT_FOUND');
|
||||
|
||||
// Debug body in detail
|
||||
console.log('🚀 Has body:', !!req.body);
|
||||
console.log('🚀 Body type:', typeof req.body);
|
||||
console.log('🚀 Body constructor:', req.body?.constructor?.name);
|
||||
console.log('🚀 Body length:', req.body?.length || 0);
|
||||
console.log('🚀 Is Buffer?:', Buffer.isBuffer(req.body));
|
||||
|
||||
// Debug all headers
|
||||
console.log('🚀 All headers:', JSON.stringify(req.headers, null, 2));
|
||||
|
||||
// Debug request properties
|
||||
console.log('🚀 Request readable:', req.readable);
|
||||
console.log('🚀 Request complete:', req.complete);
|
||||
|
||||
// If body exists, show first few bytes
|
||||
if (req.body && req.body.length > 0) {
|
||||
const preview = req.body.slice(0, 100).toString('hex');
|
||||
console.log('🚀 Body preview (hex):', preview);
|
||||
|
||||
// Try to see if it contains multipart boundary
|
||||
const bodyStr = req.body.toString('utf8', 0, Math.min(500, req.body.length));
|
||||
console.log('🚀 Body preview (string):', bodyStr.substring(0, 200));
|
||||
}
|
||||
|
||||
console.log('🚀 =========================');
|
||||
|
||||
try {
|
||||
const userId = req.user?.uid;
|
||||
if (!userId) {
|
||||
console.log('❌ Authentication failed - no userId');
|
||||
res.status(401).json({
|
||||
error: 'User not authenticated',
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('✅ Authentication successful for user:', userId);
|
||||
|
||||
// Get raw body buffer for Document AI processing
|
||||
const rawBody = req.body;
|
||||
if (!rawBody || rawBody.length === 0) {
|
||||
res.status(400).json({
|
||||
error: 'No file data received',
|
||||
correlationId: req.correlationId,
|
||||
debug: {
|
||||
method: req.method,
|
||||
contentType: req.get('Content-Type'),
|
||||
contentLength: req.get('Content-Length'),
|
||||
hasRawBody: !!rawBody,
|
||||
rawBodySize: rawBody?.length || 0,
|
||||
bodyType: typeof rawBody
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('✅ Found raw body buffer:', rawBody.length, 'bytes');
|
||||
|
||||
// Create document record first
|
||||
const document = await DocumentModel.create({
|
||||
user_id: userId,
|
||||
original_file_name: 'uploaded-document.pdf',
|
||||
file_path: '',
|
||||
file_size: rawBody.length,
|
||||
status: 'processing_llm'
|
||||
});
|
||||
|
||||
console.log('✅ Document record created:', document.id);
|
||||
|
||||
// Process with Document AI directly
|
||||
const { DocumentAiGenkitProcessor } = await import('../services/documentAiGenkitProcessor');
|
||||
const processor = new DocumentAiGenkitProcessor();
|
||||
|
||||
console.log('✅ Starting Document AI processing...');
|
||||
const result = await processor.processDocument(
|
||||
document.id,
|
||||
userId,
|
||||
rawBody,
|
||||
'uploaded-document.pdf',
|
||||
'application/pdf'
|
||||
);
|
||||
|
||||
if (result.success) {
|
||||
await DocumentModel.updateById(document.id, {
|
||||
status: 'completed',
|
||||
generated_summary: result.content,
|
||||
processing_completed_at: new Date()
|
||||
});
|
||||
|
||||
console.log('✅ Document AI processing completed successfully');
|
||||
|
||||
res.status(201).json({
|
||||
id: document.id,
|
||||
name: 'uploaded-document.pdf',
|
||||
originalName: 'uploaded-document.pdf',
|
||||
status: 'completed',
|
||||
uploadedAt: document.created_at,
|
||||
uploadedBy: userId,
|
||||
fileSize: rawBody.length,
|
||||
summary: result.content,
|
||||
correlationId: req.correlationId || undefined
|
||||
});
|
||||
return;
|
||||
} else {
|
||||
console.log('❌ Document AI processing failed:', result.error);
|
||||
await DocumentModel.updateById(document.id, {
|
||||
status: 'failed',
|
||||
error_message: result.error
|
||||
});
|
||||
|
||||
res.status(500).json({
|
||||
error: 'Document processing failed',
|
||||
message: result.error,
|
||||
correlationId: req.correlationId || undefined
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log('❌ Upload error:', error);
|
||||
|
||||
logger.error('Upload document failed', {
|
||||
error,
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
|
||||
res.status(500).json({
|
||||
error: 'Upload failed',
|
||||
message: error instanceof Error ? error.message : 'Unknown error',
|
||||
correlationId: req.correlationId || undefined
|
||||
});
|
||||
}
|
||||
|
||||
@@ -18,20 +18,17 @@ import { notFoundHandler } from './middleware/notFoundHandler';
|
||||
|
||||
const app = express();
|
||||
|
||||
// Enable trust proxy to ensure Express works correctly behind the proxy
|
||||
// Add this middleware to log all incoming requests
|
||||
app.use((req, res, next) => {
|
||||
console.log(`Incoming request: ${req.method} ${req.path}`);
|
||||
next();
|
||||
});
|
||||
|
||||
// Enable trust proxy to ensure Express works correctly behind a proxy
|
||||
app.set('trust proxy', 1);
|
||||
|
||||
// Security middleware
|
||||
app.use(helmet({
|
||||
contentSecurityPolicy: {
|
||||
directives: {
|
||||
defaultSrc: ["'self'"],
|
||||
styleSrc: ["'self'", "'unsafe-inline'"],
|
||||
scriptSrc: ["'self'"],
|
||||
imgSrc: ["'self'", "data:", "https:"],
|
||||
},
|
||||
},
|
||||
}));
|
||||
app.use(helmet());
|
||||
|
||||
// CORS configuration
|
||||
const allowedOrigins = [
|
||||
@@ -43,13 +40,10 @@ const allowedOrigins = [
|
||||
|
||||
app.use(cors({
|
||||
origin: function (origin, callback) {
|
||||
console.log('🌐 CORS request from origin:', origin);
|
||||
|
||||
if (!origin || allowedOrigins.indexOf(origin) !== -1) {
|
||||
console.log('✅ CORS allowed for origin:', origin);
|
||||
callback(null, true);
|
||||
} else {
|
||||
console.log('❌ CORS blocked origin:', origin);
|
||||
logger.warn(`CORS blocked for origin: ${origin}`);
|
||||
callback(new Error('Not allowed by CORS'));
|
||||
}
|
||||
},
|
||||
@@ -62,7 +56,7 @@ app.use(cors({
|
||||
// Rate limiting
|
||||
const limiter = rateLimit({
|
||||
windowMs: 15 * 60 * 1000, // 15 minutes
|
||||
max: 1000, // limit each IP to 1000 requests per windowMs (increased for testing)
|
||||
max: 1000,
|
||||
message: {
|
||||
error: 'Too many requests from this IP, please try again later.',
|
||||
},
|
||||
@@ -72,27 +66,6 @@ const limiter = rateLimit({
|
||||
|
||||
app.use(limiter);
|
||||
|
||||
// Body parsing middleware - only for non-multipart requests
|
||||
app.use((req, res, next) => {
|
||||
if (req.headers['content-type'] && req.headers['content-type'].includes('multipart/form-data')) {
|
||||
// Skip body parsing for multipart requests - let multer handle it
|
||||
next();
|
||||
} else {
|
||||
// Parse JSON and URL-encoded bodies for other requests
|
||||
express.json({ limit: '10mb' })(req, res, next);
|
||||
}
|
||||
});
|
||||
|
||||
app.use((req, res, next) => {
|
||||
if (req.headers['content-type'] && req.headers['content-type'].includes('multipart/form-data')) {
|
||||
// Skip body parsing for multipart requests - let multer handle it
|
||||
next();
|
||||
} else {
|
||||
// Parse URL-encoded bodies for other requests
|
||||
express.urlencoded({ extended: true, limit: '10mb' })(req, res, next);
|
||||
}
|
||||
});
|
||||
|
||||
// Logging middleware
|
||||
app.use(morgan('combined', {
|
||||
stream: {
|
||||
@@ -100,17 +73,12 @@ app.use(morgan('combined', {
|
||||
},
|
||||
}));
|
||||
|
||||
// Request debugging middleware
|
||||
app.use((req, res, next) => {
|
||||
console.log('📥 Incoming request:', req.method, req.url);
|
||||
console.log('📥 Request headers:', Object.keys(req.headers));
|
||||
console.log('📥 Content-Type:', req.get('Content-Type'));
|
||||
console.log('📥 Authorization:', req.get('Authorization') ? 'Present' : 'Missing');
|
||||
next();
|
||||
});
|
||||
// CRITICAL: Add body parsing BEFORE routes
|
||||
app.use(express.json({ limit: '10mb' }));
|
||||
app.use(express.urlencoded({ extended: true, limit: '10mb' }));
|
||||
|
||||
// Health check endpoint
|
||||
app.get('/health', (_req, res) => { // _req to fix TS6133
|
||||
app.get('/health', (_req, res) => {
|
||||
res.status(200).json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
@@ -119,53 +87,23 @@ app.get('/health', (_req, res) => { // _req to fix TS6133
|
||||
});
|
||||
});
|
||||
|
||||
// Agentic RAG health check endpoints
|
||||
app.get('/health/agentic-rag', async (_req, res) => {
|
||||
try {
|
||||
const { agenticRAGDatabaseService } = await import('./services/agenticRAGDatabaseService');
|
||||
const healthStatus = await agenticRAGDatabaseService.getHealthStatus();
|
||||
res.json(healthStatus);
|
||||
} catch (error) {
|
||||
logger.error('Agentic RAG health check failed', { error });
|
||||
res.status(500).json({
|
||||
error: 'Health check failed',
|
||||
status: 'unhealthy',
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/health/agentic-rag/metrics', async (_req, res) => {
|
||||
try {
|
||||
const { agenticRAGDatabaseService } = await import('./services/agenticRAGDatabaseService');
|
||||
const startDate = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); // 30 days ago
|
||||
const metrics = await agenticRAGDatabaseService.generatePerformanceReport(startDate, new Date());
|
||||
res.json(metrics);
|
||||
} catch (error) {
|
||||
logger.error('Agentic RAG metrics retrieval failed', { error });
|
||||
res.status(500).json({ error: 'Metrics retrieval failed' });
|
||||
}
|
||||
});
|
||||
|
||||
// API routes - remove the /api prefix as it's handled by Firebase
|
||||
// API Routes
|
||||
app.use('/documents', documentRoutes);
|
||||
app.use('/vector', vectorRoutes);
|
||||
app.use('/monitoring', monitoringRoutes);
|
||||
|
||||
|
||||
import * as functions from 'firebase-functions';
|
||||
import { onRequest } from 'firebase-functions/v2/https';
|
||||
|
||||
// API root endpoint
|
||||
app.get('/', (_req, res) => { // _req to fix TS6133
|
||||
app.get('/', (_req, res) => {
|
||||
res.json({
|
||||
message: 'CIM Document Processor API',
|
||||
version: '1.0.0',
|
||||
endpoints: {
|
||||
auth: '/auth',
|
||||
documents: '/documents',
|
||||
health: '/health',
|
||||
agenticRagHealth: '/health/agentic-rag',
|
||||
agenticRagMetrics: '/health/agentic-rag/metrics',
|
||||
monitoring: '/monitoring',
|
||||
},
|
||||
});
|
||||
@@ -177,26 +115,11 @@ app.use(notFoundHandler);
|
||||
// Global error handler (must be last)
|
||||
app.use(errorHandler);
|
||||
|
||||
// Initialize job queue service for document processing
|
||||
import { jobQueueService } from './services/jobQueueService';
|
||||
|
||||
// Start the job queue service asynchronously to avoid blocking function startup
|
||||
// Use a longer delay to ensure the function is fully initialized
|
||||
setTimeout(() => {
|
||||
try {
|
||||
jobQueueService.start();
|
||||
logger.info('Job queue service started successfully');
|
||||
} catch (error) {
|
||||
logger.error('Failed to start job queue service', { error });
|
||||
}
|
||||
}, 5000);
|
||||
|
||||
// Listen on a port when not in a Firebase Function environment or when PORT is explicitly set
|
||||
if (!process.env['FUNCTION_TARGET'] || process.env['PORT']) {
|
||||
const port = process.env['PORT'] || 5001;
|
||||
app.listen(port, () => {
|
||||
logger.info(`API server listening on port ${port}`);
|
||||
});
|
||||
}
|
||||
|
||||
export const api = functions.https.onRequest(app);
|
||||
// Configure Firebase Functions v2 for larger uploads
|
||||
export const api = onRequest({
|
||||
timeoutSeconds: 540, // 9 minutes
|
||||
memory: '2GiB',
|
||||
cpu: 1,
|
||||
maxInstances: 10,
|
||||
cors: true
|
||||
}, app);
|
||||
@@ -11,6 +11,18 @@ export const errorHandler = (
|
||||
req: Request,
|
||||
res: Response
|
||||
): void => {
|
||||
console.log('💥💥💥 MAXIMUM DEBUG ERROR HANDLER HIT 💥💥💥');
|
||||
console.log('💥 Error name:', err.name);
|
||||
console.log('💥 Error message:', err.message);
|
||||
console.log('💥 Error code:', (err as any).code);
|
||||
console.log('💥 Error type:', typeof err);
|
||||
console.log('💥 Error constructor:', err.constructor.name);
|
||||
console.log('💥 Error stack:', err.stack);
|
||||
console.log('💥 Request URL:', req.url);
|
||||
console.log('💥 Request method:', req.method);
|
||||
console.log('💥 Full error object:', JSON.stringify(err, Object.getOwnPropertyNames(err), 2));
|
||||
console.log('💥💥💥 END ERROR DEBUG 💥💥💥');
|
||||
|
||||
let error = { ...err };
|
||||
error.message = err.message;
|
||||
|
||||
@@ -53,6 +65,13 @@ export const errorHandler = (
|
||||
error = { message, statusCode: 401 } as AppError;
|
||||
}
|
||||
|
||||
// Multer errors (check if multer is imported anywhere)
|
||||
if (err.name === 'MulterError' || (err as any).code === 'UNEXPECTED_END_OF_FORM') {
|
||||
console.log('🚨 MULTER ERROR CAUGHT:', err.message);
|
||||
const message = `File upload failed: ${err.message}`;
|
||||
error = { message, statusCode: 400 } as AppError;
|
||||
}
|
||||
|
||||
// Default error
|
||||
const statusCode = error.statusCode || 500;
|
||||
const message = error.message || 'Server Error';
|
||||
|
||||
@@ -13,9 +13,15 @@ if (!fs.existsSync(uploadDir)) {
|
||||
|
||||
// File filter function
|
||||
const fileFilter = (req: Request, file: any, cb: multer.FileFilterCallback) => {
|
||||
console.log('🔍 File filter called for:', file.originalname);
|
||||
console.log('🔍 ===== FILE FILTER CALLED =====');
|
||||
console.log('🔍 File originalname:', file.originalname);
|
||||
console.log('🔍 File mimetype:', file.mimetype);
|
||||
console.log('🔍 File size:', file.size);
|
||||
console.log('🔍 File encoding:', file.encoding);
|
||||
console.log('🔍 File fieldname:', file.fieldname);
|
||||
console.log('🔍 Request Content-Type:', req.get('Content-Type'));
|
||||
console.log('🔍 Request Content-Length:', req.get('Content-Length'));
|
||||
console.log('🔍 ===========================');
|
||||
|
||||
// Check file type - allow PDF and text files for testing
|
||||
const allowedTypes = ['application/pdf', 'text/plain', 'text/html'];
|
||||
@@ -68,6 +74,14 @@ const upload = multer({
|
||||
|
||||
// Error handling middleware for multer
|
||||
export const handleUploadError = (error: any, req: Request, res: Response, next: NextFunction): void => {
|
||||
console.log('🚨 =============================');
|
||||
console.log('🚨 UPLOAD ERROR HANDLER CALLED');
|
||||
console.log('🚨 Error type:', error?.constructor?.name);
|
||||
console.log('🚨 Error message:', error?.message);
|
||||
console.log('🚨 Error code:', error?.code);
|
||||
console.log('🚨 Is MulterError:', error instanceof multer.MulterError);
|
||||
console.log('🚨 =============================');
|
||||
|
||||
if (error instanceof multer.MulterError) {
|
||||
logger.error('Multer error during file upload:', {
|
||||
error: error.message,
|
||||
@@ -129,12 +143,14 @@ export const handleUploadError = (error: any, req: Request, res: Response, next:
|
||||
|
||||
// Main upload middleware with timeout handling
|
||||
export const uploadMiddleware = (req: Request, res: Response, next: NextFunction) => {
|
||||
console.log('📤 Upload middleware called');
|
||||
console.log('📤 =============================');
|
||||
console.log('📤 UPLOAD MIDDLEWARE CALLED');
|
||||
console.log('📤 Request method:', req.method);
|
||||
console.log('📤 Request URL:', req.url);
|
||||
console.log('📤 Content-Type:', req.get('Content-Type'));
|
||||
console.log('📤 Content-Length:', req.get('Content-Length'));
|
||||
console.log('📤 User-Agent:', req.get('User-Agent'));
|
||||
console.log('📤 =============================');
|
||||
|
||||
// Set a timeout for the upload
|
||||
const uploadTimeout = setTimeout(() => {
|
||||
@@ -155,12 +171,25 @@ export const uploadMiddleware = (req: Request, res: Response, next: NextFunction
|
||||
clearTimeout(uploadTimeout);
|
||||
if (err) {
|
||||
console.log('❌ Upload middleware error:', err);
|
||||
console.log('❌ Error details:', {
|
||||
name: err.name,
|
||||
message: err.message,
|
||||
code: err.code,
|
||||
stack: err.stack?.split('\n')[0]
|
||||
});
|
||||
} else {
|
||||
console.log('✅ Upload middleware completed successfully');
|
||||
console.log('✅ File after multer processing:', {
|
||||
hasFile: !!req.file,
|
||||
filename: req.file?.originalname,
|
||||
size: req.file?.size,
|
||||
mimetype: req.file?.mimetype
|
||||
});
|
||||
}
|
||||
originalNext(err);
|
||||
};
|
||||
|
||||
console.log('🔄 Calling multer.single("document")...');
|
||||
upload.single('document')(req, res, next);
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { logger } from '../utils/logger';
|
||||
import pool from '../config/database';
|
||||
import { getSupabaseServiceClient } from '../config/supabase';
|
||||
|
||||
export interface DocumentChunk {
|
||||
id: string;
|
||||
@@ -15,577 +15,21 @@ export interface DocumentChunk {
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
export interface VectorSearchResult {
|
||||
documentId: string;
|
||||
similarityScore: number;
|
||||
chunkContent: string;
|
||||
metadata: Record<string, any>;
|
||||
}
|
||||
|
||||
export interface DocumentSimilarity {
|
||||
id: string;
|
||||
sourceDocumentId: string;
|
||||
targetDocumentId: string;
|
||||
similarityScore: number;
|
||||
similarityType: string;
|
||||
metadata: Record<string, any>;
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
export interface IndustryEmbedding {
|
||||
id: string;
|
||||
industryName: string;
|
||||
industryDescription?: string;
|
||||
embedding: number[];
|
||||
documentCount: number;
|
||||
averageSimilarity?: number;
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
export class VectorDatabaseModel {
|
||||
/**
|
||||
* Store document chunks with embeddings
|
||||
*/
|
||||
static async storeDocumentChunks(chunks: Omit<DocumentChunk, 'id' | 'createdAt' | 'updatedAt'>[]): Promise<void> {
|
||||
const client = await pool.connect();
|
||||
const supabase = getSupabaseServiceClient();
|
||||
const { data, error } = await supabase
|
||||
.from('document_chunks')
|
||||
.insert(chunks.map(chunk => ({
|
||||
...chunk,
|
||||
embedding: `[${chunk.embedding.join(',')}]` // Format for pgvector
|
||||
})));
|
||||
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
|
||||
for (const chunk of chunks) {
|
||||
// Ensure embedding is properly formatted for pgvector
|
||||
const embeddingArray = Array.isArray(chunk.embedding) ? chunk.embedding : [];
|
||||
|
||||
// Validate embedding dimensions (should be 1536 for text-embedding-3-small)
|
||||
if (embeddingArray.length !== 1536) {
|
||||
logger.warn(`Embedding dimension mismatch: expected 1536, got ${embeddingArray.length}`);
|
||||
// Pad or truncate to 1536 dimensions if necessary
|
||||
const paddedEmbedding = new Array(1536).fill(0);
|
||||
embeddingArray.forEach((val, index) => {
|
||||
if (index < 1536) paddedEmbedding[index] = val;
|
||||
});
|
||||
}
|
||||
|
||||
// Format embedding properly for pgvector - must be a JSON array string
|
||||
const embeddingString = JSON.stringify(embeddingArray);
|
||||
|
||||
await client.query(`
|
||||
INSERT INTO document_chunks (
|
||||
id, document_id, content, metadata, embedding,
|
||||
chunk_index, section, page_number
|
||||
) VALUES ($1, $2, $3, $4, $5::vector, $6, $7, $8)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
content = EXCLUDED.content,
|
||||
metadata = EXCLUDED.metadata,
|
||||
embedding = EXCLUDED.embedding,
|
||||
section = EXCLUDED.section,
|
||||
page_number = EXCLUDED.page_number,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
`, [
|
||||
uuidv4(),
|
||||
chunk.documentId,
|
||||
chunk.content,
|
||||
JSON.stringify(chunk.metadata),
|
||||
embeddingString, // Pass as JSON string for pgvector
|
||||
chunk.chunkIndex,
|
||||
chunk.section,
|
||||
chunk.pageNumber
|
||||
]);
|
||||
}
|
||||
|
||||
await client.query('COMMIT');
|
||||
logger.info(`Stored ${chunks.length} document chunks in vector database`);
|
||||
} catch (error) {
|
||||
await client.query('ROLLBACK');
|
||||
if (error) {
|
||||
logger.error('Failed to store document chunks', error);
|
||||
throw error;
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for similar content using vector similarity
|
||||
*/
|
||||
static async searchSimilarContent(
|
||||
queryEmbedding: number[],
|
||||
options: {
|
||||
documentId?: string;
|
||||
limit?: number;
|
||||
similarityThreshold?: number;
|
||||
filters?: Record<string, any>;
|
||||
} = {}
|
||||
): Promise<VectorSearchResult[]> {
|
||||
const {
|
||||
documentId,
|
||||
limit = 10,
|
||||
similarityThreshold = 0.7,
|
||||
filters = {}
|
||||
} = options;
|
||||
|
||||
// Ensure embedding is properly formatted
|
||||
const embeddingArray = Array.isArray(queryEmbedding) ? queryEmbedding : [];
|
||||
|
||||
// Validate embedding dimensions
|
||||
if (embeddingArray.length !== 1536) {
|
||||
logger.warn(`Query embedding dimension mismatch: expected 1536, got ${embeddingArray.length}`);
|
||||
// Pad or truncate to 1536 dimensions if necessary
|
||||
const paddedEmbedding = new Array(1536).fill(0);
|
||||
embeddingArray.forEach((val, index) => {
|
||||
if (index < 1536) paddedEmbedding[index] = val;
|
||||
});
|
||||
}
|
||||
|
||||
let query = `
|
||||
SELECT
|
||||
dc.document_id,
|
||||
1 - (dc.embedding <=> $1::vector) as similarity_score,
|
||||
dc.content as chunk_content,
|
||||
dc.metadata
|
||||
FROM document_chunks dc
|
||||
WHERE dc.embedding IS NOT NULL
|
||||
`;
|
||||
|
||||
const params: any[] = [embeddingArray];
|
||||
let paramIndex = 2;
|
||||
|
||||
if (documentId) {
|
||||
query += ` AND dc.document_id = $${paramIndex}`;
|
||||
params.push(documentId);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
// Add metadata filters
|
||||
Object.entries(filters).forEach(([key, value]) => {
|
||||
query += ` AND dc.metadata->>'${key}' = $${paramIndex}`;
|
||||
params.push(value);
|
||||
paramIndex++;
|
||||
});
|
||||
|
||||
query += `
|
||||
AND 1 - (dc.embedding <=> $1::vector) >= $${paramIndex}
|
||||
ORDER BY dc.embedding <=> $1::vector
|
||||
LIMIT $${paramIndex + 1}
|
||||
`;
|
||||
params.push(similarityThreshold, limit);
|
||||
|
||||
try {
|
||||
const result = await pool.query(query, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
documentId: row.document_id,
|
||||
similarityScore: parseFloat(row.similarity_score),
|
||||
chunkContent: row.chunk_content,
|
||||
metadata: row.metadata
|
||||
}));
|
||||
} catch (error) {
|
||||
logger.error('Vector search failed', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document chunks by document ID
|
||||
*/
|
||||
static async getDocumentChunks(documentId: string): Promise<DocumentChunk[]> {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
document_id,
|
||||
content,
|
||||
metadata,
|
||||
embedding,
|
||||
chunk_index,
|
||||
section,
|
||||
page_number,
|
||||
created_at,
|
||||
updated_at
|
||||
FROM document_chunks
|
||||
WHERE document_id = $1
|
||||
ORDER BY chunk_index
|
||||
`, [documentId]);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
id: row.id,
|
||||
documentId: row.document_id,
|
||||
content: row.content,
|
||||
metadata: row.metadata || {},
|
||||
embedding: row.embedding || [],
|
||||
chunkIndex: row.chunk_index,
|
||||
section: row.section,
|
||||
pageNumber: row.page_number,
|
||||
createdAt: row.created_at,
|
||||
updatedAt: row.updated_at
|
||||
}));
|
||||
} catch (error) {
|
||||
logger.error('Failed to get document chunks', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar documents
|
||||
*/
|
||||
static async findSimilarDocuments(
|
||||
documentId: string,
|
||||
limit: number = 10,
|
||||
similarityThreshold: number = 0.6
|
||||
): Promise<DocumentSimilarity[]> {
|
||||
try {
|
||||
// Get document chunks
|
||||
const documentChunks = await this.getDocumentChunks(documentId);
|
||||
if (documentChunks.length === 0) return [];
|
||||
|
||||
// Use the first chunk as reference
|
||||
const referenceChunk = documentChunks[0];
|
||||
if (!referenceChunk || !referenceChunk.embedding) return [];
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
source_document_id,
|
||||
target_document_id,
|
||||
similarity_score,
|
||||
similarity_type,
|
||||
metadata,
|
||||
created_at
|
||||
FROM document_similarities
|
||||
WHERE source_document_id = $1
|
||||
AND similarity_score >= $2
|
||||
ORDER BY similarity_score DESC
|
||||
LIMIT $3
|
||||
`, [documentId, similarityThreshold, limit]);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
id: row.id,
|
||||
sourceDocumentId: row.source_document_id,
|
||||
targetDocumentId: row.target_document_id,
|
||||
similarityScore: parseFloat(row.similarity_score),
|
||||
similarityType: row.similarity_type,
|
||||
metadata: row.metadata || {},
|
||||
createdAt: row.created_at
|
||||
}));
|
||||
} catch (error) {
|
||||
logger.error('Failed to find similar documents', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update document similarities
|
||||
*/
|
||||
static async updateDocumentSimilarities(): Promise<void> {
|
||||
try {
|
||||
await pool.query(`
|
||||
SELECT update_document_similarities();
|
||||
`);
|
||||
logger.info('Document similarities updated');
|
||||
} catch (error) {
|
||||
logger.error('Failed to update document similarities', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store industry embedding
|
||||
*/
|
||||
static async storeIndustryEmbedding(industry: Omit<IndustryEmbedding, 'id' | 'createdAt' | 'updatedAt'>): Promise<void> {
|
||||
try {
|
||||
// Ensure embedding is properly formatted
|
||||
const embeddingArray = Array.isArray(industry.embedding) ? industry.embedding : [];
|
||||
|
||||
// Validate embedding dimensions
|
||||
if (embeddingArray.length !== 1536) {
|
||||
logger.warn(`Industry embedding dimension mismatch: expected 1536, got ${embeddingArray.length}`);
|
||||
// Pad or truncate to 1536 dimensions if necessary
|
||||
const paddedEmbedding = new Array(1536).fill(0);
|
||||
embeddingArray.forEach((val, index) => {
|
||||
if (index < 1536) paddedEmbedding[index] = val;
|
||||
});
|
||||
}
|
||||
|
||||
await pool.query(`
|
||||
INSERT INTO industry_embeddings (
|
||||
id, industry_name, industry_description, embedding,
|
||||
document_count, average_similarity
|
||||
) VALUES ($1, $2, $3, $4::vector, $5, $6)
|
||||
ON CONFLICT (industry_name) DO UPDATE SET
|
||||
industry_description = EXCLUDED.industry_description,
|
||||
embedding = EXCLUDED.embedding,
|
||||
document_count = EXCLUDED.document_count,
|
||||
average_similarity = EXCLUDED.average_similarity,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
`, [
|
||||
uuidv4(),
|
||||
industry.industryName,
|
||||
industry.industryDescription,
|
||||
embeddingArray,
|
||||
industry.documentCount,
|
||||
industry.averageSimilarity
|
||||
]);
|
||||
|
||||
logger.info(`Stored industry embedding for: ${industry.industryName}`);
|
||||
} catch (error) {
|
||||
logger.error('Failed to store industry embedding', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search by industry
|
||||
*/
|
||||
static async searchByIndustry(
|
||||
industryName: string,
|
||||
queryEmbedding: number[],
|
||||
limit: number = 20
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
// Ensure embedding is properly formatted
|
||||
const embeddingArray = Array.isArray(queryEmbedding) ? queryEmbedding : [];
|
||||
|
||||
// Validate embedding dimensions
|
||||
if (embeddingArray.length !== 1536) {
|
||||
logger.warn(`Industry search embedding dimension mismatch: expected 1536, got ${embeddingArray.length}`);
|
||||
// Pad or truncate to 1536 dimensions if necessary
|
||||
const paddedEmbedding = new Array(1536).fill(0);
|
||||
embeddingArray.forEach((val, index) => {
|
||||
if (index < 1536) paddedEmbedding[index] = val;
|
||||
});
|
||||
}
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
dc.document_id,
|
||||
1 - (dc.embedding <=> $1::vector) as similarity_score,
|
||||
dc.content as chunk_content,
|
||||
dc.metadata
|
||||
FROM document_chunks dc
|
||||
WHERE dc.embedding IS NOT NULL
|
||||
AND dc.metadata->>'industry' = $2
|
||||
ORDER BY dc.embedding <=> $1::vector
|
||||
LIMIT $3
|
||||
`, [embeddingArray, industryName.toLowerCase(), limit]);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
documentId: row.document_id,
|
||||
similarityScore: parseFloat(row.similarity_score),
|
||||
chunkContent: row.chunk_content,
|
||||
metadata: row.metadata || {}
|
||||
}));
|
||||
} catch (error) {
|
||||
logger.error('Failed to search by industry', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Track search query for analytics
|
||||
*/
|
||||
static async trackSearchQuery(
|
||||
userId: string,
|
||||
queryText: string,
|
||||
queryEmbedding: number[],
|
||||
searchResults: VectorSearchResult[],
|
||||
options: {
|
||||
filters?: Record<string, any>;
|
||||
limitCount?: number;
|
||||
similarityThreshold?: number;
|
||||
processingTimeMs?: number;
|
||||
} = {}
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Ensure embedding is properly formatted
|
||||
const embeddingArray = Array.isArray(queryEmbedding) ? queryEmbedding : [];
|
||||
|
||||
// Validate embedding dimensions
|
||||
if (embeddingArray.length !== 1536) {
|
||||
logger.warn(`Search tracking embedding dimension mismatch: expected 1536, got ${embeddingArray.length}`);
|
||||
// Pad or truncate to 1536 dimensions if necessary
|
||||
const paddedEmbedding = new Array(1536).fill(0);
|
||||
embeddingArray.forEach((val, index) => {
|
||||
if (index < 1536) paddedEmbedding[index] = val;
|
||||
});
|
||||
}
|
||||
|
||||
await pool.query(`
|
||||
INSERT INTO vector_similarity_searches (
|
||||
id, user_id, query_text, query_embedding, search_results,
|
||||
filters, limit_count, similarity_threshold, processing_time_ms
|
||||
) VALUES ($1, $2, $3, $4::vector, $5, $6, $7, $8, $9)
|
||||
`, [
|
||||
uuidv4(),
|
||||
userId,
|
||||
queryText,
|
||||
embeddingArray,
|
||||
JSON.stringify(searchResults),
|
||||
JSON.stringify(options.filters || {}),
|
||||
options.limitCount || 10,
|
||||
options.similarityThreshold || 0.7,
|
||||
options.processingTimeMs || 0
|
||||
]);
|
||||
|
||||
logger.debug('Search query tracked for analytics');
|
||||
} catch (error) {
|
||||
logger.error('Failed to track search query', error);
|
||||
// Don't throw - analytics failure shouldn't break search
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get search analytics
|
||||
*/
|
||||
static async getSearchAnalytics(userId: string, days: number = 30): Promise<any[]> {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
query_text,
|
||||
COUNT(*) as search_count,
|
||||
AVG(processing_time_ms) as avg_processing_time,
|
||||
AVG(similarity_threshold) as avg_similarity_threshold,
|
||||
MAX(created_at) as last_search
|
||||
FROM vector_similarity_searches
|
||||
WHERE user_id = $1
|
||||
AND created_at >= NOW() - INTERVAL '${days} days'
|
||||
GROUP BY query_text
|
||||
ORDER BY search_count DESC
|
||||
LIMIT 20
|
||||
`, [userId]);
|
||||
|
||||
return result.rows;
|
||||
} catch (error) {
|
||||
logger.error('Failed to get search analytics', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete document chunks
|
||||
*/
|
||||
static async deleteDocumentChunks(documentId: string): Promise<void> {
|
||||
try {
|
||||
await pool.query(`
|
||||
DELETE FROM document_chunks
|
||||
WHERE document_id = $1
|
||||
`, [documentId]);
|
||||
|
||||
logger.info(`Deleted chunks for document: ${documentId}`);
|
||||
} catch (error) {
|
||||
logger.error('Failed to delete document chunks', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vector database statistics
|
||||
*/
|
||||
static async getVectorDatabaseStats(): Promise<{
|
||||
totalChunks: number;
|
||||
totalDocuments: number;
|
||||
totalSearches: number;
|
||||
averageSimilarity: number;
|
||||
}> {
|
||||
try {
|
||||
const [chunksResult, documentsResult, searchesResult, similarityResult] = await Promise.all([
|
||||
pool.query('SELECT COUNT(*) as count FROM document_chunks'),
|
||||
pool.query('SELECT COUNT(DISTINCT document_id) as count FROM document_chunks'),
|
||||
pool.query('SELECT COUNT(*) as count FROM vector_similarity_searches'),
|
||||
pool.query(`
|
||||
SELECT AVG(similarity_score) as avg_similarity
|
||||
FROM document_similarities
|
||||
WHERE similarity_score > 0
|
||||
`)
|
||||
]);
|
||||
|
||||
return {
|
||||
totalChunks: parseInt(chunksResult.rows[0]?.count || '0'),
|
||||
totalDocuments: parseInt(documentsResult.rows[0]?.count || '0'),
|
||||
totalSearches: parseInt(searchesResult.rows[0]?.count || '0'),
|
||||
averageSimilarity: parseFloat(similarityResult.rows[0]?.avg_similarity || '0')
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('Failed to get vector database stats', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all chunks (for testing/debugging)
|
||||
*/
|
||||
static async getAllChunks(): Promise<DocumentChunk[]> {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
document_id,
|
||||
content,
|
||||
metadata,
|
||||
embedding,
|
||||
chunk_index,
|
||||
section,
|
||||
page_number,
|
||||
created_at,
|
||||
updated_at
|
||||
FROM document_chunks
|
||||
ORDER BY document_id, chunk_index
|
||||
LIMIT 1000
|
||||
`);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
id: row.id,
|
||||
documentId: row.document_id,
|
||||
content: row.content,
|
||||
metadata: row.metadata || {},
|
||||
embedding: row.embedding || [],
|
||||
chunkIndex: row.chunk_index,
|
||||
section: row.section,
|
||||
pageNumber: row.page_number,
|
||||
createdAt: row.created_at,
|
||||
updatedAt: row.updated_at
|
||||
}));
|
||||
} catch (error) {
|
||||
logger.error('Failed to get all chunks', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total chunk count
|
||||
*/
|
||||
static async getTotalChunkCount(): Promise<number> {
|
||||
try {
|
||||
const result = await pool.query('SELECT COUNT(*) as count FROM document_chunks');
|
||||
return parseInt(result.rows[0]?.count || '0');
|
||||
} catch (error) {
|
||||
logger.error('Failed to get total chunk count', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total document count
|
||||
*/
|
||||
static async getTotalDocumentCount(): Promise<number> {
|
||||
try {
|
||||
const result = await pool.query('SELECT COUNT(DISTINCT document_id) as count FROM document_chunks');
|
||||
return parseInt(result.rows[0]?.count || '0');
|
||||
} catch (error) {
|
||||
logger.error('Failed to get total document count', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get average chunk size
|
||||
*/
|
||||
static async getAverageChunkSize(): Promise<number> {
|
||||
try {
|
||||
const result = await pool.query('SELECT AVG(LENGTH(content)) as avg_size FROM document_chunks');
|
||||
return Math.round(parseFloat(result.rows[0]?.avg_size || '0'));
|
||||
} catch (error) {
|
||||
logger.error('Failed to get average chunk size', error);
|
||||
throw error;
|
||||
}
|
||||
logger.info(`Stored ${chunks.length} document chunks in vector database`);
|
||||
}
|
||||
}
|
||||
@@ -63,6 +63,7 @@ export interface ProcessingJob {
|
||||
}
|
||||
|
||||
export type ProcessingStatus =
|
||||
| 'uploading'
|
||||
| 'uploaded'
|
||||
| 'extracting_text'
|
||||
| 'processing_llm'
|
||||
|
||||
@@ -23,9 +23,13 @@ const router = express.Router();
|
||||
router.use(verifyFirebaseToken);
|
||||
router.use(addCorrelationId);
|
||||
|
||||
// Essential document management routes (keeping these)
|
||||
// NEW Firebase Storage direct upload routes
|
||||
router.post('/upload-url', documentController.getUploadUrl);
|
||||
router.post('/:id/confirm-upload', validateUUID('id'), documentController.confirmUpload);
|
||||
|
||||
// LEGACY multipart upload routes (keeping for backward compatibility)
|
||||
router.post('/upload', handleFileUpload, documentController.uploadDocument);
|
||||
router.post('/', handleFileUpload, documentController.uploadDocument); // Add direct POST to /documents for frontend compatibility
|
||||
router.post('/', handleFileUpload, documentController.uploadDocument);
|
||||
router.get('/', documentController.getDocuments);
|
||||
|
||||
// Analytics endpoints (MUST come before /:id routes to avoid conflicts)
|
||||
|
||||
@@ -483,6 +483,37 @@ class FileStorageService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate signed upload URL for direct client uploads
|
||||
*/
|
||||
async generateSignedUploadUrl(filePath: string, contentType: string, expirationMinutes: number = 60): Promise<string> {
|
||||
try {
|
||||
const bucket = this.storage.bucket(this.bucketName);
|
||||
const file = bucket.file(filePath);
|
||||
|
||||
// Generate signed upload URL with retry logic
|
||||
const [signedUrl] = await this.retryOperation(
|
||||
async () => file.getSignedUrl({
|
||||
version: 'v4',
|
||||
action: 'write',
|
||||
expires: Date.now() + (expirationMinutes * 60 * 1000),
|
||||
contentType: contentType,
|
||||
}),
|
||||
'generate signed upload URL from GCS'
|
||||
);
|
||||
|
||||
logger.info(`Generated signed upload URL for file: ${filePath}`, {
|
||||
contentType,
|
||||
expirationMinutes,
|
||||
});
|
||||
|
||||
return signedUrl;
|
||||
} catch (error) {
|
||||
logger.error(`Error generating signed upload URL for file: ${filePath}`, error);
|
||||
throw new Error(`Failed to generate upload URL: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy file within Google Cloud Storage
|
||||
*/
|
||||
|
||||
23
cors.json
Normal file
23
cors.json
Normal file
@@ -0,0 +1,23 @@
|
||||
[
|
||||
{
|
||||
"origin": [
|
||||
"https://cim-summarizer.web.app",
|
||||
"https://cim-summarizer.firebaseapp.com",
|
||||
"http://localhost:3000",
|
||||
"http://localhost:5173"
|
||||
],
|
||||
"method": [
|
||||
"GET",
|
||||
"POST",
|
||||
"PUT",
|
||||
"DELETE",
|
||||
"OPTIONS"
|
||||
],
|
||||
"responseHeader": [
|
||||
"Content-Type",
|
||||
"Authorization",
|
||||
"X-Requested-With"
|
||||
],
|
||||
"maxAgeSeconds": 3600
|
||||
}
|
||||
]
|
||||
6
firebase.json
Normal file
6
firebase.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"storage": {
|
||||
"rules": "storage.rules",
|
||||
"cors": "storage.cors.json"
|
||||
}
|
||||
}
|
||||
@@ -63,6 +63,10 @@
|
||||
}
|
||||
],
|
||||
"rewrites": [
|
||||
{
|
||||
"source": "/api/**",
|
||||
"function": "api"
|
||||
},
|
||||
{
|
||||
"source": "**",
|
||||
"destination": "/index.html"
|
||||
|
||||
@@ -387,19 +387,6 @@ const Dashboard: React.FC = () => {
|
||||
<span className="text-sm text-white">
|
||||
Welcome, {user?.name || user?.email}
|
||||
</span>
|
||||
{/* Debug buttons - show in production for troubleshooting */}
|
||||
<button
|
||||
onClick={handleDebugAuth}
|
||||
className="bg-yellow-500 hover:bg-yellow-600 text-white px-3 py-1 rounded text-sm"
|
||||
>
|
||||
Debug Auth
|
||||
</button>
|
||||
<button
|
||||
onClick={handleTestAPIAuth}
|
||||
className="bg-blue-500 hover:bg-blue-600 text-white px-3 py-1 rounded text-sm"
|
||||
>
|
||||
Test API
|
||||
</button>
|
||||
<LogoutButton variant="button" className="bg-error-500 hover:bg-error-600 text-white" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -14,10 +14,10 @@ interface UploadedFile {
|
||||
progress: number;
|
||||
error?: string;
|
||||
documentId?: string; // Real document ID from backend
|
||||
// GCS-specific fields
|
||||
gcsError?: boolean;
|
||||
storageType?: 'gcs' | 'local';
|
||||
gcsUrl?: string;
|
||||
// Firebase Storage specific fields
|
||||
storageError?: boolean;
|
||||
storageType?: 'firebase' | 'local';
|
||||
storageUrl?: string;
|
||||
}
|
||||
|
||||
interface DocumentUploadProps {
|
||||
@@ -92,14 +92,12 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
|
||||
try {
|
||||
// Upload the document with optimized agentic RAG processing (no strategy selection needed)
|
||||
const document = await documentService.uploadDocument(
|
||||
const result = await documentService.uploadDocument(
|
||||
file,
|
||||
(progress) => {
|
||||
setUploadedFiles(prev =>
|
||||
prev.map(f =>
|
||||
f.id === uploadedFile.id
|
||||
? { ...f, progress }
|
||||
: f
|
||||
f.id === uploadedFile.id ? { ...f, progress } : f
|
||||
)
|
||||
);
|
||||
},
|
||||
@@ -141,13 +139,13 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
} else {
|
||||
console.error('Upload failed:', error);
|
||||
|
||||
// Handle GCS-specific errors
|
||||
// Handle storage-specific errors
|
||||
let errorMessage = 'Upload failed';
|
||||
let isGCSError = false;
|
||||
let isStorageError = false;
|
||||
|
||||
if (GCSErrorHandler.isGCSError(error)) {
|
||||
errorMessage = GCSErrorHandler.getErrorMessage(error as GCSError);
|
||||
isGCSError = true;
|
||||
isStorageError = true;
|
||||
} else if (error instanceof Error) {
|
||||
errorMessage = error.message;
|
||||
}
|
||||
@@ -159,8 +157,8 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
...f,
|
||||
status: 'error',
|
||||
error: errorMessage,
|
||||
// Add GCS error indicator
|
||||
...(isGCSError && { gcsError: true })
|
||||
// Add storage error indicator
|
||||
...(isStorageError && { storageError: true })
|
||||
}
|
||||
: f
|
||||
)
|
||||
@@ -297,19 +295,19 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
}
|
||||
};
|
||||
|
||||
const getStatusText = (status: UploadedFile['status'], error?: string, gcsError?: boolean) => {
|
||||
const getStatusText = (status: UploadedFile['status'], error?: string, storageError?: boolean) => {
|
||||
switch (status) {
|
||||
case 'uploading':
|
||||
return 'Uploading to Google Cloud Storage...';
|
||||
return 'Uploading to Firebase Storage...';
|
||||
case 'uploaded':
|
||||
return 'Uploaded to GCS ✓';
|
||||
return 'Uploaded to Firebase Storage ✓';
|
||||
case 'processing':
|
||||
return 'Processing with Optimized Agentic RAG...';
|
||||
return 'Processing with Document AI + Optimized Agentic RAG...';
|
||||
case 'completed':
|
||||
return 'Completed ✓';
|
||||
return 'Completed ✓ (PDF automatically deleted)';
|
||||
case 'error':
|
||||
if (error === 'Upload cancelled') return 'Cancelled';
|
||||
if (gcsError) return 'GCS Error';
|
||||
if (storageError) return 'Firebase Storage Error';
|
||||
return 'Error';
|
||||
default:
|
||||
return '';
|
||||
@@ -323,10 +321,10 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
<div className="flex items-center">
|
||||
<CheckCircle className="h-5 w-5 text-blue-600 mr-2" />
|
||||
<div>
|
||||
<h3 className="text-sm font-medium text-blue-800">Optimized Agentic RAG Processing</h3>
|
||||
<h3 className="text-sm font-medium text-blue-800">Document AI + Optimized Agentic RAG Processing</h3>
|
||||
<p className="text-sm text-blue-700 mt-1">
|
||||
All documents are automatically processed using our advanced optimized agentic RAG system,
|
||||
which includes intelligent chunking, vectorization, and multi-agent analysis for the best results.
|
||||
All documents are automatically processed using Google Document AI for extraction and our advanced optimized agentic RAG system for analysis,
|
||||
including intelligent chunking, vectorization, and multi-agent CIM review. PDFs are automatically deleted after processing.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -351,7 +349,7 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
Drag and drop PDF files here, or click to browse
|
||||
</p>
|
||||
<p className="text-xs text-gray-500">
|
||||
Maximum file size: 50MB • Supported format: PDF • Stored securely in Google Cloud Storage • Automatic Optimized Agentic RAG Processing
|
||||
Maximum file size: 50MB • Supported format: PDF • Stored securely in Firebase Storage • Automatic Document AI + Optimized Agentic RAG Processing • PDFs deleted after processing
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -379,8 +377,8 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
<div>
|
||||
<h4 className="text-sm font-medium text-success-800">Upload Complete</h4>
|
||||
<p className="text-sm text-success-700 mt-1">
|
||||
Files have been uploaded successfully to Google Cloud Storage! You can now navigate away from this page.
|
||||
Processing will continue in the background using Optimized Agentic RAG and you can check the status in the Documents tab.
|
||||
Files have been uploaded successfully to Firebase Storage! You can now navigate away from this page.
|
||||
Processing will continue in the background using Document AI + Optimized Agentic RAG. PDFs will be automatically deleted after processing to save costs.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -426,10 +424,10 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
<div className="flex items-center space-x-1">
|
||||
{getStatusIcon(file.status)}
|
||||
<span className="text-xs text-gray-600">
|
||||
{getStatusText(file.status, file.error, file.gcsError)}
|
||||
{getStatusText(file.status, file.error, file.storageError)}
|
||||
</span>
|
||||
{/* GCS indicator */}
|
||||
{file.storageType === 'gcs' && (
|
||||
{/* Firebase Storage indicator */}
|
||||
{file.storageType === 'firebase' && (
|
||||
<Cloud className="h-3 w-3 text-blue-500" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -60,7 +60,7 @@ export interface Document {
|
||||
file_path: string;
|
||||
file_size: number;
|
||||
uploaded_at: string;
|
||||
status: 'uploaded' | 'extracting_text' | 'processing_llm' | 'generating_pdf' | 'completed' | 'failed';
|
||||
status: 'uploading' | 'uploaded' | 'extracting_text' | 'processing_llm' | 'generating_pdf' | 'completed' | 'failed';
|
||||
extracted_text?: string;
|
||||
generated_summary?: string;
|
||||
summary_markdown_path?: string;
|
||||
@@ -219,7 +219,7 @@ export class GCSErrorHandler {
|
||||
|
||||
class DocumentService {
|
||||
/**
|
||||
* Upload a document for processing
|
||||
* Upload a document using Firebase Storage direct upload (new method)
|
||||
*/
|
||||
async uploadDocument(
|
||||
file: File,
|
||||
@@ -233,7 +233,137 @@ class DocumentService {
|
||||
throw new Error('Authentication required. Please log in to upload documents.');
|
||||
}
|
||||
|
||||
console.log('📤 Starting document upload...');
|
||||
console.log('📤 Starting Firebase Storage direct upload...');
|
||||
console.log('📤 File:', file.name, 'Size:', file.size, 'Type:', file.type);
|
||||
console.log('📤 Token available:', !!token);
|
||||
|
||||
// Step 1: Get signed upload URL
|
||||
onProgress?.(5); // 5% - Getting upload URL
|
||||
|
||||
console.log('🌐 Making request to upload-url endpoint');
|
||||
console.log('🌐 Base URL:', API_BASE_URL);
|
||||
console.log('🌐 Full URL would be:', `${API_BASE_URL}/documents/upload-url`);
|
||||
console.log('🌐 Request payload:', { fileName: file.name, fileSize: file.size, contentType: file.type });
|
||||
|
||||
const uploadUrlResponse = await apiClient.post('/documents/upload-url', {
|
||||
fileName: file.name,
|
||||
fileSize: file.size,
|
||||
contentType: file.type
|
||||
}, { signal });
|
||||
|
||||
const { documentId, uploadUrl } = uploadUrlResponse.data;
|
||||
console.log('✅ Got signed upload URL for document:', documentId);
|
||||
|
||||
// Step 2: Upload directly to Firebase Storage
|
||||
onProgress?.(10); // 10% - Starting direct upload
|
||||
|
||||
await this.uploadToFirebaseStorage(file, uploadUrl, onProgress, signal);
|
||||
console.log('✅ File uploaded to Firebase Storage');
|
||||
|
||||
// Step 3: Confirm upload and trigger processing
|
||||
onProgress?.(95); // 95% - Confirming upload
|
||||
|
||||
const confirmResponse = await apiClient.post(`/documents/${documentId}/confirm-upload`, {}, { signal });
|
||||
|
||||
onProgress?.(100); // 100% - Complete
|
||||
console.log('✅ Upload confirmed and processing started');
|
||||
|
||||
return {
|
||||
id: documentId,
|
||||
...confirmResponse.data
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('❌ Firebase Storage upload failed:', error);
|
||||
|
||||
// Handle specific error cases
|
||||
if (error.name === 'AbortError') {
|
||||
throw new Error('Upload was cancelled.');
|
||||
}
|
||||
|
||||
if (error.response?.status === 401) {
|
||||
throw new Error('Authentication required. Please log in again.');
|
||||
}
|
||||
|
||||
if (error.response?.status === 400) {
|
||||
throw new Error(error.response?.data?.error || 'Invalid request');
|
||||
}
|
||||
|
||||
if (error.response?.status >= 500) {
|
||||
throw new Error('Server error. Please try again later.');
|
||||
}
|
||||
|
||||
// Generic error fallback
|
||||
throw new Error(error.response?.data?.error || error.message || 'Upload failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload file directly to Firebase Storage using signed URL
|
||||
*/
|
||||
private async uploadToFirebaseStorage(
|
||||
file: File,
|
||||
uploadUrl: string,
|
||||
onProgress?: (progress: number) => void,
|
||||
signal?: AbortSignal
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const xhr = new XMLHttpRequest();
|
||||
|
||||
// Handle upload progress
|
||||
xhr.upload.addEventListener('progress', (event) => {
|
||||
if (event.lengthComputable && onProgress) {
|
||||
// Map Firebase Storage upload to 10%-90% of overall progress
|
||||
const uploadProgress = Math.round((event.loaded / event.total) * 80) + 10;
|
||||
onProgress(uploadProgress);
|
||||
}
|
||||
});
|
||||
|
||||
// Handle completion
|
||||
xhr.addEventListener('load', () => {
|
||||
if (xhr.status >= 200 && xhr.status < 300) {
|
||||
resolve();
|
||||
} else {
|
||||
reject(new Error(`Firebase Storage upload failed: ${xhr.status} ${xhr.statusText}`));
|
||||
}
|
||||
});
|
||||
|
||||
// Handle errors
|
||||
xhr.addEventListener('error', () => {
|
||||
reject(new Error('Firebase Storage upload failed: Network error'));
|
||||
});
|
||||
|
||||
// Handle abort
|
||||
if (signal) {
|
||||
signal.addEventListener('abort', () => {
|
||||
xhr.abort();
|
||||
reject(new Error('Upload was cancelled'));
|
||||
});
|
||||
}
|
||||
|
||||
// Start upload
|
||||
xhr.open('PUT', uploadUrl);
|
||||
xhr.setRequestHeader('Content-Type', file.type);
|
||||
xhr.send(file);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy multipart upload method (kept for compatibility)
|
||||
*/
|
||||
async uploadDocumentLegacy(
|
||||
file: File,
|
||||
onProgress?: (progress: number) => void,
|
||||
signal?: AbortSignal
|
||||
): Promise<Document> {
|
||||
try {
|
||||
// Check authentication before upload
|
||||
const token = await authService.getToken();
|
||||
if (!token) {
|
||||
throw new Error('Authentication required. Please log in to upload documents.');
|
||||
}
|
||||
|
||||
console.log('📤 Starting legacy multipart upload...');
|
||||
console.log('📤 File:', file.name, 'Size:', file.size, 'Type:', file.type);
|
||||
console.log('📤 Token available:', !!token);
|
||||
|
||||
@@ -243,7 +373,7 @@ class DocumentService {
|
||||
// Always use optimized agentic RAG processing - no strategy selection needed
|
||||
formData.append('processingStrategy', 'optimized_agentic_rag');
|
||||
|
||||
const response = await apiClient.post('/documents', formData, {
|
||||
const response = await apiClient.post('/documents/upload', formData, {
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data',
|
||||
},
|
||||
@@ -256,10 +386,10 @@ class DocumentService {
|
||||
},
|
||||
});
|
||||
|
||||
console.log('✅ Document upload successful:', response.data);
|
||||
console.log('✅ Legacy document upload successful:', response.data);
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
console.error('❌ Document upload failed:', error);
|
||||
console.error('❌ Legacy document upload failed:', error);
|
||||
|
||||
// Provide more specific error messages
|
||||
if (error.response?.status === 401) {
|
||||
|
||||
23
storage.cors.json
Normal file
23
storage.cors.json
Normal file
@@ -0,0 +1,23 @@
|
||||
[
|
||||
{
|
||||
"origin": [
|
||||
"https://cim-summarizer.web.app",
|
||||
"https://cim-summarizer.firebaseapp.com",
|
||||
"http://localhost:3000",
|
||||
"http://localhost:5173"
|
||||
],
|
||||
"method": [
|
||||
"GET",
|
||||
"POST",
|
||||
"PUT",
|
||||
"DELETE",
|
||||
"OPTIONS"
|
||||
],
|
||||
"responseHeader": [
|
||||
"Content-Type",
|
||||
"Authorization",
|
||||
"X-Requested-With"
|
||||
],
|
||||
"maxAgeSeconds": 3600
|
||||
}
|
||||
]
|
||||
8
storage.rules
Normal file
8
storage.rules
Normal file
@@ -0,0 +1,8 @@
|
||||
rules_version = '2';
|
||||
service firebase.storage {
|
||||
match /b/{bucket}/o {
|
||||
match /{allPaths=**} {
|
||||
allow read, write: if request.auth != null;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user