Refactor: Codebase cleanup and modularization

- Remove outdated documentation files (7 files)
- Remove deprecated code (database.ts, authController.ts, auth.ts)
- Extract constants to backend/src/config/constants.ts
- Consolidate shared types (processing, llm, document, job)
- Create LLM modularization structure:
  - llmPrompts/ directory for prompt builders
  - llmProviders/ directory for provider implementations
  - llmUtils/ directory for utility functions
- Extract common error handling patterns to errorHandlers.ts
- Organize scripts into subdirectories (monitoring/, testing/, debugging/, setup/)
- Update README.md with current documentation references

All functionality preserved, structure improved for maintainability.
This commit is contained in:
admin
2025-11-11 06:52:10 -05:00
parent ecd4b13115
commit e406d33074
73 changed files with 5441 additions and 7054 deletions

View File

@@ -0,0 +1,161 @@
#!/usr/bin/env ts-node
/**
* Script to check database for failed or stuck documents
*
* This script queries the documents table to find:
* - Documents stuck in 'uploading' or 'processing_llm' status
* - Documents with 'failed' status and their error messages
* - Patterns in failure types
*/
import { DocumentModel } from '../models/DocumentModel';
import { config } from '../config/env';
import { logger } from '../utils/logger';
interface DocumentStatus {
status: string;
count: number;
documents: any[];
}
interface FailurePattern {
errorPattern: string;
count: number;
examples: string[];
}
async function checkStuckDocuments() {
console.log('\n📊 Checking for Stuck Documents...\n');
try {
// Get all documents (limit to 1000 for performance)
const allDocuments = await DocumentModel.findAll(1000, 0);
// Group by status
const statusGroups: { [key: string]: any[] } = {};
for (const doc of allDocuments) {
const status = doc.status || 'unknown';
if (!statusGroups[status]) {
statusGroups[status] = [];
}
statusGroups[status].push(doc);
}
// Check for stuck documents
const stuckStatuses = ['uploading', 'processing', 'processing_llm', 'extracting_text'];
const now = Date.now();
const oneHourAgo = now - (60 * 60 * 1000);
const oneDayAgo = now - (24 * 60 * 60 * 1000);
const tenMinutesAgo = now - (10 * 60 * 1000); // Also check for documents stuck > 10 minutes
console.log('Status Summary:');
for (const [status, docs] of Object.entries(statusGroups)) {
console.log(` ${status}: ${docs.length} documents`);
if (stuckStatuses.includes(status)) {
const stuckDocs = docs.filter(doc => {
const updatedAt = doc.updated_at ? new Date(doc.updated_at).getTime() : 0;
return updatedAt < oneHourAgo;
});
if (stuckDocs.length > 0) {
console.log(` ⚠️ ${stuckDocs.length} documents stuck (not updated in last hour)`);
stuckDocs.slice(0, 5).forEach(doc => {
const updatedAt = doc.updated_at ? new Date(doc.updated_at).toISOString() : 'unknown';
console.log(` - ${doc.id}: Updated ${updatedAt}`);
});
}
}
}
// Check failed documents
const failedDocs = statusGroups['failed'] || [];
if (failedDocs.length > 0) {
console.log(`\n❌ Failed Documents: ${failedDocs.length} total\n`);
// Analyze error patterns
const errorPatterns: { [key: string]: string[] } = {};
for (const doc of failedDocs) {
const errorMsg = doc.error_message || 'Unknown error';
// Extract key error words
const keyWords = errorMsg
.toLowerCase()
.split(/\s+/)
.filter((word: string) => word.length > 5 && !['failed', 'error', 'the', 'and', 'for'].includes(word))
.slice(0, 3)
.join(' ');
if (!errorPatterns[keyWords]) {
errorPatterns[keyWords] = [];
}
errorPatterns[keyWords].push(errorMsg);
}
console.log('Error Patterns:');
const sortedPatterns = Object.entries(errorPatterns)
.sort((a, b) => b[1].length - a[1].length)
.slice(0, 10);
for (const [pattern, examples] of sortedPatterns) {
console.log(` "${pattern}": ${examples.length} occurrences`);
console.log(` Example: ${examples[0].substring(0, 100)}...`);
}
}
return {
totalDocuments: allDocuments.length,
statusGroups,
stuckCount: Object.values(statusGroups)
.flat()
.filter((doc: any) => {
const status = doc.status || 'unknown';
if (!stuckStatuses.includes(status)) return false;
const updatedAt = doc.updated_at ? new Date(doc.updated_at).getTime() : 0;
return updatedAt < oneHourAgo;
}).length,
failedCount: failedDocs.length
};
} catch (error) {
console.error('Error checking database:', error);
logger.error('Database check failed', { error });
throw error;
}
}
async function main() {
console.log('🔍 Database Failure Diagnostic Tool');
console.log('='.repeat(60));
try {
const results = await checkStuckDocuments();
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Total Documents: ${results.totalDocuments}`);
console.log(`Stuck Documents: ${results.stuckCount}`);
console.log(`Failed Documents: ${results.failedCount}`);
console.log('='.repeat(60));
if (results.stuckCount > 0 || results.failedCount > 0) {
console.log('\n⚠ Issues found. Review the details above.');
process.exit(1);
} else {
console.log('\n✅ No issues found.');
process.exit(0);
}
} catch (error) {
console.error('\n💥 Diagnostic tool encountered an error:', error);
process.exit(1);
}
}
// Run if executed directly
if (require.main === module) {
main();
}
export { checkStuckDocuments };