Files
cim_summary/backend/src/controllers/documentController.ts
Jon 6057d1d7fd 🔧 Fix authentication and document upload issues
## What was done:
 Fixed Firebase Admin initialization to use default credentials for Firebase Functions
 Updated frontend to use correct Firebase Functions URL (was using Cloud Run URL)
 Added comprehensive debugging to authentication middleware
 Added debugging to file upload middleware and CORS handling
 Added debug buttons to frontend for troubleshooting authentication
 Enhanced error handling and logging throughout the stack

## Current issues:
 Document upload still returns 400 Bad Request despite authentication working
 GET requests work fine (200 OK) but POST upload requests fail
 Frontend authentication is working correctly (valid JWT tokens)
 Backend authentication middleware is working (rejects invalid tokens)
 CORS is configured correctly and allowing requests

## Root cause analysis:
- Authentication is NOT the issue (tokens are valid, GET requests work)
- The problem appears to be in the file upload handling or multer configuration
- Request reaches the server but fails during upload processing
- Need to identify exactly where in the upload pipeline the failure occurs

## TODO next steps:
1. 🔍 Check Firebase Functions logs after next upload attempt to see debugging output
2. 🔍 Verify if request reaches upload middleware (look for '�� Upload middleware called' logs)
3. 🔍 Check if file validation is triggered (look for '🔍 File filter called' logs)
4. 🔍 Identify specific error in upload pipeline (multer, file processing, etc.)
5. 🔍 Test with smaller file or different file type to isolate issue
6. 🔍 Check if issue is with Firebase Functions file size limits or timeout
7. 🔍 Verify multer configuration and file handling in Firebase Functions environment

## Technical details:
- Frontend: https://cim-summarizer.web.app
- Backend: https://us-central1-cim-summarizer.cloudfunctions.net/api
- Authentication: Firebase Auth with JWT tokens (working correctly)
- File upload: Multer with memory storage for immediate GCS upload
- Debug buttons available in production frontend for troubleshooting
2025-07-31 16:18:53 -04:00

555 lines
16 KiB
TypeScript

import { Request, Response } from 'express';
import { logger, StructuredLogger } from '../utils/logger';
import { DocumentModel } from '../models/DocumentModel';
import { fileStorageService } from '../services/fileStorageService';
import { jobQueueService } from '../services/jobQueueService';
import { uploadProgressService } from '../services/uploadProgressService';
import { uploadMonitoringService } from '../services/uploadMonitoringService';
export const documentController = {
async uploadDocument(req: Request, res: Response): Promise<void> {
const startTime = Date.now();
const structuredLogger = new StructuredLogger(req.correlationId);
try {
const userId = req.user?.uid;
if (!userId) {
res.status(401).json({
error: 'User not authenticated',
correlationId: req.correlationId
});
return;
}
// Check if file was uploaded
if (!req.file) {
res.status(400).json({
error: 'No file uploaded',
correlationId: req.correlationId
});
return;
}
const file = req.file;
// Track upload start
const uploadEventData: any = {
userId,
fileInfo: {
originalName: file.originalname,
size: file.size,
mimetype: file.mimetype,
},
status: 'started',
stage: 'upload_initiated',
};
if (req.correlationId) {
uploadEventData.correlationId = req.correlationId;
}
uploadMonitoringService.trackUploadEvent(uploadEventData);
structuredLogger.uploadStart({
originalName: file.originalname,
size: file.size,
mimetype: file.mimetype,
}, userId);
// Always use optimized agentic RAG processing - no strategy selection needed
const processingStrategy = 'optimized_agentic_rag';
// Store file and get file path
const storageResult = await fileStorageService.storeFile(file, userId);
if (!storageResult.success || !storageResult.fileInfo) {
const processingTime = Date.now() - startTime;
// Track upload failure
const failureEventData: any = {
userId,
fileInfo: {
originalName: file.originalname,
size: file.size,
mimetype: file.mimetype,
},
status: 'failed',
stage: 'file_storage',
error: {
message: storageResult.error || 'Failed to store file',
type: 'storage_error',
code: 'STORAGE_ERROR',
},
processingTime,
};
if (req.correlationId) {
failureEventData.correlationId = req.correlationId;
}
uploadMonitoringService.trackUploadEvent(failureEventData);
structuredLogger.uploadError(
new Error(storageResult.error || 'Failed to store file'),
{
originalName: file.originalname,
size: file.size,
mimetype: file.mimetype,
},
userId,
'file_storage'
);
res.status(500).json({
error: 'Failed to store file',
correlationId: req.correlationId
});
return;
}
// Create document record
const document = await DocumentModel.create({
user_id: userId,
original_file_name: file.originalname,
file_path: storageResult.fileInfo.path,
file_size: file.size,
status: 'uploaded'
});
// Always auto-process with optimized agentic RAG
try {
const jobId = await jobQueueService.addJob(
'document_processing',
{
documentId: document.id,
userId: userId,
options: { strategy: processingStrategy }
},
0 // Normal priority
);
logger.info('Document processing job queued with optimized agentic RAG', {
documentId: document.id,
jobId,
strategy: processingStrategy
});
// Update status to indicate it's queued for processing
await DocumentModel.updateById(document.id, { status: 'extracting_text' });
} catch (error) {
logger.error('Failed to queue document processing job', { error, documentId: document.id });
}
// Track upload success
const processingTime = Date.now() - startTime;
const successEventData: any = {
userId,
fileInfo: {
originalName: file.originalname,
size: file.size,
mimetype: file.mimetype,
},
status: 'success',
stage: 'upload_completed',
processingTime,
};
if (req.correlationId) {
successEventData.correlationId = req.correlationId;
}
uploadMonitoringService.trackUploadEvent(successEventData);
structuredLogger.uploadSuccess({
originalName: file.originalname,
size: file.size,
mimetype: file.mimetype,
}, userId, processingTime);
// Return document info
res.status(201).json({
id: document.id,
name: document.original_file_name,
originalName: document.original_file_name,
status: 'extracting_text',
uploadedAt: document.created_at,
uploadedBy: userId,
fileSize: document.file_size,
processingStrategy: processingStrategy,
correlationId: req.correlationId || undefined
});
} catch (error) {
const processingTime = Date.now() - startTime;
// Track upload failure
const errorEventData: any = {
userId: req.user?.uid || 'unknown',
fileInfo: {
originalName: req.file?.originalname || 'unknown',
size: req.file?.size || 0,
mimetype: req.file?.mimetype || 'unknown',
},
status: 'failed',
stage: 'upload_error',
error: {
message: error instanceof Error ? error.message : 'Unknown error',
type: 'upload_error',
},
processingTime,
};
if (req.correlationId) {
errorEventData.correlationId = req.correlationId;
}
uploadMonitoringService.trackUploadEvent(errorEventData);
structuredLogger.uploadError(
error,
{
originalName: req.file?.originalname || 'unknown',
size: req.file?.size || 0,
mimetype: req.file?.mimetype || 'unknown',
},
req.user?.uid || 'unknown',
'upload_error'
);
logger.error('Upload document failed', {
error,
correlationId: req.correlationId
});
res.status(500).json({
error: 'Upload failed',
correlationId: req.correlationId || undefined
});
}
},
async getDocuments(req: Request, res: Response): Promise<void> {
try {
const userId = req.user?.uid;
if (!userId) {
res.status(401).json({
error: 'User not authenticated',
correlationId: req.correlationId
});
return;
}
const documents = await DocumentModel.findByUserId(userId);
const formattedDocuments = documents.map(doc => ({
id: doc.id,
name: doc.original_file_name,
originalName: doc.original_file_name,
status: doc.status,
uploadedAt: doc.created_at,
processedAt: doc.processing_completed_at,
uploadedBy: userId,
fileSize: doc.file_size,
summary: doc.generated_summary,
error: doc.error_message,
extractedData: doc.analysis_data || (doc.extracted_text ? { text: doc.extracted_text } : undefined)
}));
res.json({
documents: formattedDocuments,
correlationId: req.correlationId || undefined
});
} catch (error) {
logger.error('Get documents failed', {
error,
correlationId: req.correlationId
});
res.status(500).json({
error: 'Get documents failed',
correlationId: req.correlationId || undefined
});
}
},
async getDocument(req: Request, res: Response): Promise<void> {
try {
const userId = req.user?.uid;
if (!userId) {
res.status(401).json({
error: 'User not authenticated',
correlationId: req.correlationId
});
return;
}
const { id } = req.params;
if (!id) {
res.status(400).json({
error: 'Document ID is required',
correlationId: req.correlationId
});
return;
}
const document = await DocumentModel.findById(id);
if (!document) {
res.status(404).json({
error: 'Document not found',
correlationId: req.correlationId
});
return;
}
// Check if user owns the document
if (document.user_id !== userId) {
res.status(403).json({
error: 'Access denied',
correlationId: req.correlationId
});
return;
}
const formattedDocument = {
id: document.id,
name: document.original_file_name,
originalName: document.original_file_name,
status: document.status,
uploadedAt: document.created_at,
processedAt: document.updated_at,
uploadedBy: userId,
fileSize: document.file_size,
summary: document.generated_summary,
error: document.error_message,
extractedData: document.analysis_data || (document.extracted_text ? { text: document.extracted_text } : undefined)
};
res.json({
...formattedDocument,
correlationId: req.correlationId || undefined
});
} catch (error) {
logger.error('Get document failed', {
error,
correlationId: req.correlationId
});
res.status(500).json({
error: 'Get document failed',
correlationId: req.correlationId || undefined
});
}
},
async getDocumentProgress(req: Request, res: Response): Promise<void> {
try {
const userId = req.user?.uid;
if (!userId) {
res.status(401).json({
error: 'User not authenticated',
correlationId: req.correlationId
});
return;
}
const { id } = req.params;
if (!id) {
res.status(400).json({
error: 'Document ID is required',
correlationId: req.correlationId
});
return;
}
const document = await DocumentModel.findById(id);
if (!document) {
res.status(404).json({
error: 'Document not found',
correlationId: req.correlationId
});
return;
}
// Check if user owns the document
if (document.user_id !== userId) {
res.status(403).json({
error: 'Access denied',
correlationId: req.correlationId
});
return;
}
// Get progress from upload progress service
const progress = uploadProgressService.getProgress(id);
// If no progress data from service, calculate based on document status
let calculatedProgress = 0;
if (document.status === 'completed') {
calculatedProgress = 100;
} else if (document.status === 'processing_llm' || document.status === 'generating_pdf') {
calculatedProgress = 75;
} else if (document.status === 'extracting_text') {
calculatedProgress = 25;
} else if (document.status === 'uploaded') {
calculatedProgress = 10;
}
res.json({
id: document.id,
status: document.status,
progress: progress ? progress.progress : calculatedProgress,
uploadedAt: document.created_at,
processedAt: document.processing_completed_at,
correlationId: req.correlationId || undefined
});
} catch (error) {
logger.error('Get document progress failed', {
error,
correlationId: req.correlationId
});
res.status(500).json({
error: 'Get document progress failed',
correlationId: req.correlationId || undefined
});
}
},
async deleteDocument(req: Request, res: Response): Promise<void> {
try {
const userId = req.user?.uid;
if (!userId) {
res.status(401).json({
error: 'User not authenticated',
correlationId: req.correlationId
});
return;
}
const { id } = req.params;
if (!id) {
res.status(400).json({
error: 'Document ID is required',
correlationId: req.correlationId
});
return;
}
const document = await DocumentModel.findById(id);
if (!document) {
res.status(404).json({
error: 'Document not found',
correlationId: req.correlationId
});
return;
}
// Check if user owns the document
if (document.user_id !== userId) {
res.status(403).json({
error: 'Access denied',
correlationId: req.correlationId
});
return;
}
// Delete from database
const deleted = await DocumentModel.delete(id);
if (!deleted) {
res.status(500).json({
error: 'Failed to delete document',
correlationId: req.correlationId
});
return;
}
// Delete file from storage
try {
await fileStorageService.deleteFile(document.file_path);
} catch (error) {
logger.warn('Failed to delete file from storage', {
error,
filePath: document.file_path,
correlationId: req.correlationId
});
}
res.json({
message: 'Document deleted successfully',
correlationId: req.correlationId || undefined
});
} catch (error) {
logger.error('Delete document failed', {
error,
correlationId: req.correlationId
});
res.status(500).json({
error: 'Delete document failed',
correlationId: req.correlationId || undefined
});
}
},
async getDocumentText(documentId: string): Promise<string> {
try {
// Get document from database
const document = await DocumentModel.findById(documentId);
if (!document) {
throw new Error('Document not found');
}
// Read file from storage
const filePath = document.file_path;
// Check if file exists
try {
const fileBuffer = await fileStorageService.getFile(filePath);
if (!fileBuffer) {
throw new Error('Document file not accessible');
}
// For PDF files, extract text using pdf-parse
if (filePath.toLowerCase().endsWith('.pdf')) {
logger.info('Extracting text from PDF file', { documentId, filePath });
try {
const pdfParse = require('pdf-parse');
const data = await pdfParse(fileBuffer);
const extractedText = data.text;
logger.info('PDF text extraction completed', {
documentId,
textLength: extractedText.length,
pages: data.numpages,
fileSize: fileBuffer.length
});
// Update document with extracted text
await DocumentModel.updateById(documentId, {
extracted_text: extractedText
});
return extractedText;
} catch (pdfError) {
logger.error('PDF text extraction failed', { documentId, error: pdfError });
// Return a minimal error message instead of hardcoded text
throw new Error(`PDF text extraction failed: ${pdfError instanceof Error ? pdfError.message : 'Unknown error'}`);
}
} else {
// For text files, read the content directly
const fileContent = fileBuffer.toString('utf-8');
return fileContent;
}
} catch (fileError) {
logger.error('Document file not accessible', { filePath, documentId, error: fileError });
throw new Error('Document file not accessible');
}
} catch (error) {
logger.error('Get document text failed', { error, documentId });
throw new Error('Failed to get document text');
}
}
};