fix(core): Overhaul and fix the end-to-end document processing pipeline
This commit is contained in:
@@ -63,6 +63,10 @@
|
||||
}
|
||||
],
|
||||
"rewrites": [
|
||||
{
|
||||
"source": "/api/**",
|
||||
"function": "api"
|
||||
},
|
||||
{
|
||||
"source": "**",
|
||||
"destination": "/index.html"
|
||||
|
||||
@@ -387,19 +387,6 @@ const Dashboard: React.FC = () => {
|
||||
<span className="text-sm text-white">
|
||||
Welcome, {user?.name || user?.email}
|
||||
</span>
|
||||
{/* Debug buttons - show in production for troubleshooting */}
|
||||
<button
|
||||
onClick={handleDebugAuth}
|
||||
className="bg-yellow-500 hover:bg-yellow-600 text-white px-3 py-1 rounded text-sm"
|
||||
>
|
||||
Debug Auth
|
||||
</button>
|
||||
<button
|
||||
onClick={handleTestAPIAuth}
|
||||
className="bg-blue-500 hover:bg-blue-600 text-white px-3 py-1 rounded text-sm"
|
||||
>
|
||||
Test API
|
||||
</button>
|
||||
<LogoutButton variant="button" className="bg-error-500 hover:bg-error-600 text-white" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -14,10 +14,10 @@ interface UploadedFile {
|
||||
progress: number;
|
||||
error?: string;
|
||||
documentId?: string; // Real document ID from backend
|
||||
// GCS-specific fields
|
||||
gcsError?: boolean;
|
||||
storageType?: 'gcs' | 'local';
|
||||
gcsUrl?: string;
|
||||
// Firebase Storage specific fields
|
||||
storageError?: boolean;
|
||||
storageType?: 'firebase' | 'local';
|
||||
storageUrl?: string;
|
||||
}
|
||||
|
||||
interface DocumentUploadProps {
|
||||
@@ -92,17 +92,15 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
|
||||
try {
|
||||
// Upload the document with optimized agentic RAG processing (no strategy selection needed)
|
||||
const document = await documentService.uploadDocument(
|
||||
file,
|
||||
const result = await documentService.uploadDocument(
|
||||
file,
|
||||
(progress) => {
|
||||
setUploadedFiles(prev =>
|
||||
prev.map(f =>
|
||||
f.id === uploadedFile.id
|
||||
? { ...f, progress }
|
||||
: f
|
||||
f.id === uploadedFile.id ? { ...f, progress } : f
|
||||
)
|
||||
);
|
||||
},
|
||||
},
|
||||
abortController.signal
|
||||
);
|
||||
|
||||
@@ -141,13 +139,13 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
} else {
|
||||
console.error('Upload failed:', error);
|
||||
|
||||
// Handle GCS-specific errors
|
||||
// Handle storage-specific errors
|
||||
let errorMessage = 'Upload failed';
|
||||
let isGCSError = false;
|
||||
let isStorageError = false;
|
||||
|
||||
if (GCSErrorHandler.isGCSError(error)) {
|
||||
errorMessage = GCSErrorHandler.getErrorMessage(error as GCSError);
|
||||
isGCSError = true;
|
||||
isStorageError = true;
|
||||
} else if (error instanceof Error) {
|
||||
errorMessage = error.message;
|
||||
}
|
||||
@@ -159,8 +157,8 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
...f,
|
||||
status: 'error',
|
||||
error: errorMessage,
|
||||
// Add GCS error indicator
|
||||
...(isGCSError && { gcsError: true })
|
||||
// Add storage error indicator
|
||||
...(isStorageError && { storageError: true })
|
||||
}
|
||||
: f
|
||||
)
|
||||
@@ -297,19 +295,19 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
}
|
||||
};
|
||||
|
||||
const getStatusText = (status: UploadedFile['status'], error?: string, gcsError?: boolean) => {
|
||||
const getStatusText = (status: UploadedFile['status'], error?: string, storageError?: boolean) => {
|
||||
switch (status) {
|
||||
case 'uploading':
|
||||
return 'Uploading to Google Cloud Storage...';
|
||||
return 'Uploading to Firebase Storage...';
|
||||
case 'uploaded':
|
||||
return 'Uploaded to GCS ✓';
|
||||
return 'Uploaded to Firebase Storage ✓';
|
||||
case 'processing':
|
||||
return 'Processing with Optimized Agentic RAG...';
|
||||
return 'Processing with Document AI + Optimized Agentic RAG...';
|
||||
case 'completed':
|
||||
return 'Completed ✓';
|
||||
return 'Completed ✓ (PDF automatically deleted)';
|
||||
case 'error':
|
||||
if (error === 'Upload cancelled') return 'Cancelled';
|
||||
if (gcsError) return 'GCS Error';
|
||||
if (storageError) return 'Firebase Storage Error';
|
||||
return 'Error';
|
||||
default:
|
||||
return '';
|
||||
@@ -323,10 +321,10 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
<div className="flex items-center">
|
||||
<CheckCircle className="h-5 w-5 text-blue-600 mr-2" />
|
||||
<div>
|
||||
<h3 className="text-sm font-medium text-blue-800">Optimized Agentic RAG Processing</h3>
|
||||
<h3 className="text-sm font-medium text-blue-800">Document AI + Optimized Agentic RAG Processing</h3>
|
||||
<p className="text-sm text-blue-700 mt-1">
|
||||
All documents are automatically processed using our advanced optimized agentic RAG system,
|
||||
which includes intelligent chunking, vectorization, and multi-agent analysis for the best results.
|
||||
All documents are automatically processed using Google Document AI for extraction and our advanced optimized agentic RAG system for analysis,
|
||||
including intelligent chunking, vectorization, and multi-agent CIM review. PDFs are automatically deleted after processing.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -351,7 +349,7 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
Drag and drop PDF files here, or click to browse
|
||||
</p>
|
||||
<p className="text-xs text-gray-500">
|
||||
Maximum file size: 50MB • Supported format: PDF • Stored securely in Google Cloud Storage • Automatic Optimized Agentic RAG Processing
|
||||
Maximum file size: 50MB • Supported format: PDF • Stored securely in Firebase Storage • Automatic Document AI + Optimized Agentic RAG Processing • PDFs deleted after processing
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -379,8 +377,8 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
<div>
|
||||
<h4 className="text-sm font-medium text-success-800">Upload Complete</h4>
|
||||
<p className="text-sm text-success-700 mt-1">
|
||||
Files have been uploaded successfully to Google Cloud Storage! You can now navigate away from this page.
|
||||
Processing will continue in the background using Optimized Agentic RAG and you can check the status in the Documents tab.
|
||||
Files have been uploaded successfully to Firebase Storage! You can now navigate away from this page.
|
||||
Processing will continue in the background using Document AI + Optimized Agentic RAG. PDFs will be automatically deleted after processing to save costs.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -426,10 +424,10 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
<div className="flex items-center space-x-1">
|
||||
{getStatusIcon(file.status)}
|
||||
<span className="text-xs text-gray-600">
|
||||
{getStatusText(file.status, file.error, file.gcsError)}
|
||||
{getStatusText(file.status, file.error, file.storageError)}
|
||||
</span>
|
||||
{/* GCS indicator */}
|
||||
{file.storageType === 'gcs' && (
|
||||
{/* Firebase Storage indicator */}
|
||||
{file.storageType === 'firebase' && (
|
||||
<Cloud className="h-3 w-3 text-blue-500" />
|
||||
)}
|
||||
</div>
|
||||
@@ -452,4 +450,4 @@ const DocumentUpload: React.FC<DocumentUploadProps> = ({
|
||||
);
|
||||
};
|
||||
|
||||
export default DocumentUpload;
|
||||
export default DocumentUpload;
|
||||
@@ -60,7 +60,7 @@ export interface Document {
|
||||
file_path: string;
|
||||
file_size: number;
|
||||
uploaded_at: string;
|
||||
status: 'uploaded' | 'extracting_text' | 'processing_llm' | 'generating_pdf' | 'completed' | 'failed';
|
||||
status: 'uploading' | 'uploaded' | 'extracting_text' | 'processing_llm' | 'generating_pdf' | 'completed' | 'failed';
|
||||
extracted_text?: string;
|
||||
generated_summary?: string;
|
||||
summary_markdown_path?: string;
|
||||
@@ -219,7 +219,7 @@ export class GCSErrorHandler {
|
||||
|
||||
class DocumentService {
|
||||
/**
|
||||
* Upload a document for processing
|
||||
* Upload a document using Firebase Storage direct upload (new method)
|
||||
*/
|
||||
async uploadDocument(
|
||||
file: File,
|
||||
@@ -233,7 +233,137 @@ class DocumentService {
|
||||
throw new Error('Authentication required. Please log in to upload documents.');
|
||||
}
|
||||
|
||||
console.log('📤 Starting document upload...');
|
||||
console.log('📤 Starting Firebase Storage direct upload...');
|
||||
console.log('📤 File:', file.name, 'Size:', file.size, 'Type:', file.type);
|
||||
console.log('📤 Token available:', !!token);
|
||||
|
||||
// Step 1: Get signed upload URL
|
||||
onProgress?.(5); // 5% - Getting upload URL
|
||||
|
||||
console.log('🌐 Making request to upload-url endpoint');
|
||||
console.log('🌐 Base URL:', API_BASE_URL);
|
||||
console.log('🌐 Full URL would be:', `${API_BASE_URL}/documents/upload-url`);
|
||||
console.log('🌐 Request payload:', { fileName: file.name, fileSize: file.size, contentType: file.type });
|
||||
|
||||
const uploadUrlResponse = await apiClient.post('/documents/upload-url', {
|
||||
fileName: file.name,
|
||||
fileSize: file.size,
|
||||
contentType: file.type
|
||||
}, { signal });
|
||||
|
||||
const { documentId, uploadUrl } = uploadUrlResponse.data;
|
||||
console.log('✅ Got signed upload URL for document:', documentId);
|
||||
|
||||
// Step 2: Upload directly to Firebase Storage
|
||||
onProgress?.(10); // 10% - Starting direct upload
|
||||
|
||||
await this.uploadToFirebaseStorage(file, uploadUrl, onProgress, signal);
|
||||
console.log('✅ File uploaded to Firebase Storage');
|
||||
|
||||
// Step 3: Confirm upload and trigger processing
|
||||
onProgress?.(95); // 95% - Confirming upload
|
||||
|
||||
const confirmResponse = await apiClient.post(`/documents/${documentId}/confirm-upload`, {}, { signal });
|
||||
|
||||
onProgress?.(100); // 100% - Complete
|
||||
console.log('✅ Upload confirmed and processing started');
|
||||
|
||||
return {
|
||||
id: documentId,
|
||||
...confirmResponse.data
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('❌ Firebase Storage upload failed:', error);
|
||||
|
||||
// Handle specific error cases
|
||||
if (error.name === 'AbortError') {
|
||||
throw new Error('Upload was cancelled.');
|
||||
}
|
||||
|
||||
if (error.response?.status === 401) {
|
||||
throw new Error('Authentication required. Please log in again.');
|
||||
}
|
||||
|
||||
if (error.response?.status === 400) {
|
||||
throw new Error(error.response?.data?.error || 'Invalid request');
|
||||
}
|
||||
|
||||
if (error.response?.status >= 500) {
|
||||
throw new Error('Server error. Please try again later.');
|
||||
}
|
||||
|
||||
// Generic error fallback
|
||||
throw new Error(error.response?.data?.error || error.message || 'Upload failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload file directly to Firebase Storage using signed URL
|
||||
*/
|
||||
private async uploadToFirebaseStorage(
|
||||
file: File,
|
||||
uploadUrl: string,
|
||||
onProgress?: (progress: number) => void,
|
||||
signal?: AbortSignal
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const xhr = new XMLHttpRequest();
|
||||
|
||||
// Handle upload progress
|
||||
xhr.upload.addEventListener('progress', (event) => {
|
||||
if (event.lengthComputable && onProgress) {
|
||||
// Map Firebase Storage upload to 10%-90% of overall progress
|
||||
const uploadProgress = Math.round((event.loaded / event.total) * 80) + 10;
|
||||
onProgress(uploadProgress);
|
||||
}
|
||||
});
|
||||
|
||||
// Handle completion
|
||||
xhr.addEventListener('load', () => {
|
||||
if (xhr.status >= 200 && xhr.status < 300) {
|
||||
resolve();
|
||||
} else {
|
||||
reject(new Error(`Firebase Storage upload failed: ${xhr.status} ${xhr.statusText}`));
|
||||
}
|
||||
});
|
||||
|
||||
// Handle errors
|
||||
xhr.addEventListener('error', () => {
|
||||
reject(new Error('Firebase Storage upload failed: Network error'));
|
||||
});
|
||||
|
||||
// Handle abort
|
||||
if (signal) {
|
||||
signal.addEventListener('abort', () => {
|
||||
xhr.abort();
|
||||
reject(new Error('Upload was cancelled'));
|
||||
});
|
||||
}
|
||||
|
||||
// Start upload
|
||||
xhr.open('PUT', uploadUrl);
|
||||
xhr.setRequestHeader('Content-Type', file.type);
|
||||
xhr.send(file);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy multipart upload method (kept for compatibility)
|
||||
*/
|
||||
async uploadDocumentLegacy(
|
||||
file: File,
|
||||
onProgress?: (progress: number) => void,
|
||||
signal?: AbortSignal
|
||||
): Promise<Document> {
|
||||
try {
|
||||
// Check authentication before upload
|
||||
const token = await authService.getToken();
|
||||
if (!token) {
|
||||
throw new Error('Authentication required. Please log in to upload documents.');
|
||||
}
|
||||
|
||||
console.log('📤 Starting legacy multipart upload...');
|
||||
console.log('📤 File:', file.name, 'Size:', file.size, 'Type:', file.type);
|
||||
console.log('📤 Token available:', !!token);
|
||||
|
||||
@@ -243,7 +373,7 @@ class DocumentService {
|
||||
// Always use optimized agentic RAG processing - no strategy selection needed
|
||||
formData.append('processingStrategy', 'optimized_agentic_rag');
|
||||
|
||||
const response = await apiClient.post('/documents', formData, {
|
||||
const response = await apiClient.post('/documents/upload', formData, {
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data',
|
||||
},
|
||||
@@ -256,10 +386,10 @@ class DocumentService {
|
||||
},
|
||||
});
|
||||
|
||||
console.log('✅ Document upload successful:', response.data);
|
||||
console.log('✅ Legacy document upload successful:', response.data);
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
console.error('❌ Document upload failed:', error);
|
||||
console.error('❌ Legacy document upload failed:', error);
|
||||
|
||||
// Provide more specific error messages
|
||||
if (error.response?.status === 401) {
|
||||
|
||||
Reference in New Issue
Block a user