cim_summary/backend/src/services/unifiedDocumentProcessor.ts

import { logger } from '../utils/logger';
import { config } from '../config/env';
import { documentProcessingService } from './documentProcessingService';
import { ragDocumentProcessor } from './ragDocumentProcessor';
import { agenticRAGProcessor } from './agenticRAGProcessor';
import { CIMReview } from './llmSchemas';
import { documentController } from '../controllers/documentController';

interface ProcessingResult {
  success: boolean;
  summary: string;
  analysisData: CIMReview;
  processingStrategy: 'chunking' | 'rag' | 'agentic_rag';
  processingTime: number;
  apiCalls: number;
  error: string | undefined;
}

interface ComparisonResult {
  chunking: ProcessingResult;
  rag: ProcessingResult;
  agenticRag: ProcessingResult;
  winner: 'chunking' | 'rag' | 'agentic_rag' | 'tie';
  performanceMetrics: {
    timeDifference: number;
    apiCallDifference: number;
    qualityScore: number;
  };
}

class UnifiedDocumentProcessor {
  /**
   * Process document using the configured strategy
   */
  async processDocument(
    documentId: string,
    userId: string,
    text: string,
    options: any = {}
  ): Promise<ProcessingResult> {
    const strategy = options.strategy || config.processingStrategy;

    logger.info('Processing document with unified processor', {
      documentId,
      strategy,
      configStrategy: config.processingStrategy,
      textLength: text.length
    });

    if (strategy === 'rag') {
      return await this.processWithRAG(documentId, text);
    } else if (strategy === 'agentic_rag') {
      return await this.processWithAgenticRAG(documentId, userId, text);
    } else {
      return await this.processWithChunking(documentId, userId, text, options);
    }
  }

  /**
   * Process document using RAG approach
   */
  private async processWithRAG(documentId: string, text: string): Promise<ProcessingResult> {
    logger.info('Using RAG processing strategy', { documentId });

    const result = await ragDocumentProcessor.processDocument(text, documentId);

    return {
      success: result.success,
      summary: result.summary,
      analysisData: result.analysisData,
      processingStrategy: 'rag',
      processingTime: result.processingTime,
      apiCalls: result.apiCalls,
      error: result.error || undefined
    };
  }

  /**
   * Process document using agentic RAG approach
   */
  private async processWithAgenticRAG(
    documentId: string,
    userId: string,
    text: string
  ): Promise<ProcessingResult> {
    logger.info('Using agentic RAG processing strategy', { documentId });

    try {
      // If text is empty, extract it from the document
      let extractedText = text;
      if (!text || text.length === 0) {
        logger.info('Extracting text for agentic RAG processing', { documentId });
        extractedText = await documentController.getDocumentText(documentId);
      }

      const result = await agenticRAGProcessor.processDocument(extractedText, documentId, userId);

      return {
        success: result.success,
        summary: result.summary,
        analysisData: result.analysisData,
        processingStrategy: 'agentic_rag',
        processingTime: result.processingTime,
        apiCalls: result.apiCalls,
        error: result.error || undefined
      };
    } catch (error) {
      logger.error('Agentic RAG processing failed', { documentId, error });

      return {
        success: false,
        summary: '',
        analysisData: {} as CIMReview,
        processingStrategy: 'agentic_rag',
        processingTime: 0,
        apiCalls: 0,
        error: error instanceof Error ? error.message : 'Unknown error'
      };
    }
  }

  /**
   * Process document using chunking approach
   */
  private async processWithChunking(
    documentId: string,
    userId: string,
    text: string,
    options: any
  ): Promise<ProcessingResult> {
    logger.info('Using chunking processing strategy', { documentId });

    const startTime = Date.now();

    try {
      const result = await documentProcessingService.processDocument(documentId, userId, options);

      // Estimate API calls for chunking (this is approximate)
      const estimatedApiCalls = this.estimateChunkingApiCalls(text);

      return {
        success: result.success,
        summary: result.summary || '',
        analysisData: (result.analysis as CIMReview) || {} as CIMReview,
        processingStrategy: 'chunking',
        processingTime: Date.now() - startTime,
        apiCalls: estimatedApiCalls,
        error: result.error || undefined
      };
    } catch (error) {
      return {
        success: false,
        summary: '',
        analysisData: {} as CIMReview,
        processingStrategy: 'chunking',
        processingTime: Date.now() - startTime,
        apiCalls: 0,
        error: error instanceof Error ? error.message : 'Unknown error'
      };
    }
  }

  /**
   * Compare all processing strategies
   */
  async compareProcessingStrategies(
    documentId: string,
    userId: string,
    text: string,
    options: any = {}
  ): Promise<ComparisonResult> {
    logger.info('Comparing processing strategies', { documentId });

    // Process with all strategies
    const [chunkingResult, ragResult, agenticRagResult] = await Promise.all([
      this.processWithChunking(documentId, userId, text, options),
      this.processWithRAG(documentId, text),
      this.processWithAgenticRAG(documentId, userId, text)
    ]);

    // Calculate performance metrics
    const timeDifference = chunkingResult.processingTime - ragResult.processingTime;
    const apiCallDifference = chunkingResult.apiCalls - ragResult.apiCalls;
    const qualityScore = this.calculateQualityScore(chunkingResult, ragResult);

    // Determine winner
    let winner: 'chunking' | 'rag' | 'agentic_rag' | 'tie' = 'tie';

    // Check which strategies were successful
    const successfulStrategies = [];
    if (chunkingResult.success) successfulStrategies.push({ name: 'chunking', result: chunkingResult });
    if (ragResult.success) successfulStrategies.push({ name: 'rag', result: ragResult });
    if (agenticRagResult.success) successfulStrategies.push({ name: 'agentic_rag', result: agenticRagResult });

    if (successfulStrategies.length === 0) {
      winner = 'tie';
    } else if (successfulStrategies.length === 1) {
      winner = successfulStrategies[0]?.name as 'chunking' | 'rag' | 'agentic_rag' || 'tie';
    } else {
      // Multiple successful strategies, compare performance
      const scores = successfulStrategies.map(strategy => {
        const result = strategy.result;
        const quality = this.calculateQualityScore(result, result); // Self-comparison for baseline
        const timeScore = 1 / (1 + result.processingTime / 60000); // Normalize to 1 minute
        const apiScore = 1 / (1 + result.apiCalls / 10); // Normalize to 10 API calls
        return {
          name: strategy.name,
          score: quality * 0.5 + timeScore * 0.25 + apiScore * 0.25
        };
      });

      scores.sort((a, b) => b.score - a.score);
      winner = scores[0]?.name as 'chunking' | 'rag' | 'agentic_rag' || 'tie';
    }

    return {
      chunking: chunkingResult,
      rag: ragResult,
      agenticRag: agenticRagResult,
      winner,
      performanceMetrics: {
        timeDifference,
        apiCallDifference,
        qualityScore
      }
    };
  }

  /**
   * Estimate API calls for chunking approach
   */
  private estimateChunkingApiCalls(text: string): number {
    const chunkSize = config.llm.chunkSize;
    const estimatedTokens = Math.ceil(text.length / 4); // Rough token estimation
    const chunks = Math.ceil(estimatedTokens / chunkSize);
    return chunks + 1; // +1 for final synthesis
  }

  /**
   * Calculate quality score based on result completeness
   */
  private calculateQualityScore(chunkingResult: ProcessingResult, ragResult: ProcessingResult): number {
    if (!chunkingResult.success && !ragResult.success) return 0.5;
    if (!chunkingResult.success) return 1.0;
    if (!ragResult.success) return 0.0;

    // Compare summary length and structure
    const chunkingScore = this.analyzeSummaryQuality(chunkingResult.summary);
    const ragScore = this.analyzeSummaryQuality(ragResult.summary);

    return ragScore / (chunkingScore + ragScore);
  }

  /**
   * Analyze summary quality based on length and structure
   */
  private analyzeSummaryQuality(summary: string): number {
    if (!summary) return 0;

    // Check for markdown structure
    const hasHeaders = (summary.match(/#{1,6}\s/g) || []).length;
    const hasLists = (summary.match(/[-*+]\s/g) || []).length;
    const hasBold = (summary.match(/\*\*.*?\*\*/g) || []).length;

    // Length factor (longer summaries tend to be more comprehensive)
    const lengthFactor = Math.min(summary.length / 5000, 1);

    // Structure factor
    const structureFactor = Math.min((hasHeaders + hasLists + hasBold) / 10, 1);

    return (lengthFactor * 0.7) + (structureFactor * 0.3);
  }

  /**
   * Get processing statistics
   */
  async getProcessingStats(): Promise<{
    totalDocuments: number;
    chunkingSuccess: number;
    ragSuccess: number;
    agenticRagSuccess: number;
    averageProcessingTime: {
      chunking: number;
      rag: number;
      agenticRag: number;
    };
    averageApiCalls: {
      chunking: number;
      rag: number;
      agenticRag: number;
    };
  }> {
    // This would typically query a database for processing statistics
    // For now, return mock data
    return {
      totalDocuments: 0,
      chunkingSuccess: 0,
      ragSuccess: 0,
      agenticRagSuccess: 0,
      averageProcessingTime: {
        chunking: 0,
        rag: 0,
        agenticRag: 0
      },
      averageApiCalls: {
        chunking: 0,
        rag: 0,
        agenticRag: 0
      }
    };
  }

  /**
   * Switch processing strategy for a document
   */
  async switchStrategy(
    documentId: string,
    userId: string,
    text: string,
    newStrategy: 'chunking' | 'rag' | 'agentic_rag',
    options: any = {}
  ): Promise<ProcessingResult> {
    logger.info('Switching processing strategy', { documentId, newStrategy });

    return await this.processDocument(documentId, userId, text, {
      ...options,
      strategy: newStrategy
    });
  }
}

export const unifiedDocumentProcessor = new UnifiedDocumentProcessor();