cim_summary/backend/src/routes/documentAudit.ts

import { Router, Request, Response } from 'express';
import { getSupabaseServiceClient } from '../config/supabase';
import { logger } from '../utils/logger';
import { addCorrelationId } from '../middleware/validation';

const router = Router();
router.use(addCorrelationId);

/**
 * GET /api/audit/document/:documentId
 * Get detailed step-by-step audit trail for a document processing
 */
router.get('/document/:documentId', async (req: Request, res: Response): Promise<void> => {
  try {
    const { documentId } = req.params;
    const supabase = getSupabaseServiceClient();

    // Get document details
    const { data: document, error: docError } = await supabase
      .from('documents')
      .select('*')
      .eq('id', documentId)
      .single();

    if (docError || !document) {
      res.status(404).json({
        success: false,
        error: 'Document not found',
        documentId,
        correlationId: req.correlationId || undefined,
      });
      return;
    }

    // Get all processing jobs for this document
    const { data: jobs, error: jobsError } = await supabase
      .from('processing_jobs')
      .select('*')
      .eq('document_id', documentId)
      .order('created_at', { ascending: false });

    // Get document chunks (embeddings)
    const { data: chunks, error: chunksError } = await supabase
      .from('document_chunks')
      .select('id, chunk_index, content, metadata, created_at, embedding')
      .eq('document_id', documentId)
      .order('chunk_index', { ascending: true });

    // Get CIM review if exists
    const { data: review, error: reviewError } = await supabase
      .from('cim_reviews')
      .select('*')
      .eq('document_id', documentId)
      .single();

    // Build comprehensive audit trail
    const auditTrail = {
      document: {
        id: document.id,
        filePath: document.file_path,
        fileName: document.file_path?.split('/').pop() || 'Unknown',
        status: document.status,
        uploadStatus: document.upload_status,
        processingStatus: document.processing_status,
        createdAt: document.created_at,
        updatedAt: document.updated_at,
        processingCompletedAt: document.processing_completed_at,
        generatedSummary: document.generated_summary ? 'Yes' : 'No',
        hasAnalysisData: !!document.analysis_data,
      },
      processingJobs: jobs?.map(job => ({
        id: job.id,
        status: job.status,
        strategy: job.options?.strategy || 'unknown',
        attempts: job.attempts,
        maxAttempts: job.max_attempts,
        createdAt: job.created_at,
        startedAt: job.started_at,
        completedAt: job.completed_at,
        error: job.error,
        processingDuration: job.started_at && job.completed_at
          ? Math.round((new Date(job.completed_at).getTime() - new Date(job.started_at).getTime()) / 1000)
          : job.started_at
          ? Math.round((Date.now() - new Date(job.started_at).getTime()) / 1000)
          : null,
        options: job.options,
      })) || [],
      vectorEmbeddings: {
        totalChunks: chunks?.length || 0,
        chunksWithEmbeddings: chunks?.filter(c => c.embedding).length || 0,
        chunks: chunks?.map(chunk => ({
          index: chunk.chunk_index,
          contentLength: chunk.content?.length || 0,
          contentPreview: chunk.content?.substring(0, 200) + '...' || 'No content',
          hasEmbedding: !!chunk.embedding,
          embeddingDimensions: chunk.embedding ? (typeof chunk.embedding === 'string' ? JSON.parse(chunk.embedding).length : chunk.embedding.length) : 0,
          createdAt: chunk.created_at,
          metadata: chunk.metadata,
        })) || [],
      },
      cimReview: review ? {
        id: review.id,
        exists: true,
        createdAt: review.created_at,
        updatedAt: review.updated_at,
        hasData: true,
      } : {
        exists: false,
        message: 'No CIM review generated yet',
      },
      processingSteps: buildProcessingSteps(document, jobs || [], chunks || [], review),
      timeline: buildTimeline(document, jobs || [], chunks || [], review),
      summary: {
        overallStatus: document.status,
        totalProcessingTime: document.processing_completed_at && document.created_at
          ? Math.round((new Date(document.processing_completed_at).getTime() - new Date(document.created_at).getTime()) / 1000)
          : null,
        totalJobs: jobs?.length || 0,
        successfulJobs: jobs?.filter(j => j.status === 'completed').length || 0,
        failedJobs: jobs?.filter(j => j.status === 'failed').length || 0,
        totalChunks: chunks?.length || 0,
        chunksWithEmbeddings: chunks?.filter(c => c.embedding).length || 0,
        hasReview: !!review,
        lastError: jobs?.find(j => j.error)?.error || null,
      },
    };

    logger.info('Document audit trail retrieved', {
      documentId,
      status: document.status,
      totalJobs: jobs?.length || 0,
      totalChunks: chunks?.length || 0,
      correlationId: req.correlationId || undefined,
    });

    res.json({
      success: true,
      data: auditTrail,
      correlationId: req.correlationId || undefined,
    });
  } catch (error) {
    logger.error('Failed to get document audit trail', {
      error: error instanceof Error ? error.message : 'Unknown error',
      documentId: req.params.documentId,
      correlationId: req.correlationId || undefined,
    });

    res.status(500).json({
      success: false,
      error: 'Failed to retrieve document audit trail',
      message: error instanceof Error ? error.message : 'Unknown error',
      correlationId: req.correlationId || undefined,
    });
  }
});

/**
 * Build detailed processing steps from audit data
 */
function buildProcessingSteps(
  document: any,
  jobs: any[],
  chunks: any[],
  review: any
): Array<{ step: string; status: 'completed' | 'in_progress' | 'failed' | 'pending'; details: any; timestamp?: string }> {
  const steps: Array<{ step: string; status: 'completed' | 'in_progress' | 'failed' | 'pending'; details: any; timestamp?: string }> = [];

  // Step 1: Document Upload
  steps.push({
    step: '1. Document Upload',
    status: document.upload_status === 'completed' ? 'completed' : document.upload_status === 'failed' ? 'failed' : 'pending',
    details: {
      filePath: document.file_path,
      uploadStatus: document.upload_status,
    },
    timestamp: document.created_at,
  });

  // Step 2: Document AI Text Extraction
  const hasExtractedText = document.processing_status || document.status !== 'pending';
  steps.push({
    step: '2. Document AI Text Extraction',
    status: hasExtractedText ? 'completed' : 'pending',
    details: {
      processingStatus: document.processing_status,
      documentStatus: document.status,
    },
    timestamp: document.updated_at,
  });

  // Step 3: Chunking
  steps.push({
    step: '3. Document Chunking',
    status: chunks.length > 0 ? 'completed' : 'pending',
    details: {
      totalChunks: chunks.length,
      averageChunkSize: chunks.length > 0
        ? Math.round(chunks.reduce((sum, c) => sum + (c.content?.length || 0), 0) / chunks.length)
        : 0,
    },
    timestamp: chunks.length > 0 ? chunks[0].created_at : undefined,
  });

  // Step 4: Vector Embedding Generation
  const chunksWithEmbeddings = chunks.filter(c => c.embedding).length;
  steps.push({
    step: '4. Vector Embedding Generation',
    status: chunksWithEmbeddings === chunks.length && chunks.length > 0
      ? 'completed'
      : chunksWithEmbeddings > 0
      ? 'in_progress'
      : 'pending',
    details: {
      chunksWithEmbeddings,
      totalChunks: chunks.length,
      completionRate: chunks.length > 0 ? ((chunksWithEmbeddings / chunks.length) * 100).toFixed(1) + '%' : '0%',
      embeddingDimensions: chunks.find(c => c.embedding)
        ? (typeof chunks.find(c => c.embedding)!.embedding === 'string'
          ? JSON.parse(chunks.find(c => c.embedding)!.embedding).length
          : chunks.find(c => c.embedding)!.embedding.length)
        : 0,
    },
    timestamp: chunks.find(c => c.embedding)?.created_at,
  });

  // Step 5: LLM Analysis
  const latestJob = jobs[0];
  const llmStepStatus = latestJob
    ? latestJob.status === 'completed'
      ? 'completed'
      : latestJob.status === 'failed'
      ? 'failed'
      : 'in_progress'
    : 'pending';

  steps.push({
    step: '5. LLM Analysis & CIM Review Generation',
    status: llmStepStatus,
    details: {
      jobStatus: latestJob?.status,
      attempts: latestJob ? `${latestJob.attempts}/${latestJob.max_attempts}` : '0/0',
      strategy: latestJob?.options?.strategy || 'unknown',
      error: latestJob?.error || null,
      hasAnalysisData: !!document.analysis_data,
    },
    timestamp: latestJob?.started_at || latestJob?.created_at,
  });

  // Step 6: CIM Review Storage
  steps.push({
    step: '6. CIM Review Storage',
    status: review ? 'completed' : document.analysis_data ? 'completed' : 'pending',
    details: {
      reviewExists: !!review,
      hasAnalysisData: !!document.analysis_data,
      reviewId: review?.id || null,
    },
    timestamp: review?.created_at || document.processing_completed_at,
  });

  // Step 7: Final Status
  steps.push({
    step: '7. Processing Complete',
    status: document.status === 'completed' ? 'completed' : document.status === 'failed' ? 'failed' : 'in_progress',
    details: {
      finalStatus: document.status,
      processingCompletedAt: document.processing_completed_at,
      hasSummary: !!document.generated_summary,
    },
    timestamp: document.processing_completed_at || document.updated_at,
  });

  return steps;
}

/**
 * Build chronological timeline of events
 */
function buildTimeline(
  document: any,
  jobs: any[],
  chunks: any[],
  review: any
): Array<{ timestamp: string; event: string; details: any }> {
  const timeline: Array<{ timestamp: string; event: string; details: any }> = [];

  // Document creation
  timeline.push({
    timestamp: document.created_at,
    event: 'Document Created',
    details: { filePath: document.file_path },
  });

  // Job events
  jobs.forEach((job, index) => {
    timeline.push({
      timestamp: job.created_at,
      event: `Job ${index + 1} Created`,
      details: { jobId: job.id, strategy: job.options?.strategy },
    });

    if (job.started_at) {
      timeline.push({
        timestamp: job.started_at,
        event: `Job ${index + 1} Started`,
        details: { jobId: job.id },
      });
    }

    if (job.completed_at) {
      timeline.push({
        timestamp: job.completed_at,
        event: `Job ${index + 1} ${job.status === 'completed' ? 'Completed' : 'Failed'}`,
        details: { jobId: job.id, status: job.status, error: job.error || null },
      });
    }
  });

  // Chunk creation (first chunk)
  if (chunks.length > 0) {
    timeline.push({
      timestamp: chunks[0].created_at,
      event: 'First Chunk Created',
      details: { totalChunks: chunks.length },
    });
  }

  // Review creation
  if (review) {
    timeline.push({
      timestamp: review.created_at,
      event: 'CIM Review Created',
      details: { reviewId: review.id },
    });
  }

  // Document updates
  if (document.updated_at !== document.created_at) {
    timeline.push({
      timestamp: document.updated_at,
      event: 'Document Updated',
      details: { status: document.status },
    });
  }

  if (document.processing_completed_at) {
    timeline.push({
      timestamp: document.processing_completed_at,
      event: 'Processing Completed',
      details: { finalStatus: document.status },
    });
  }

  // Sort by timestamp
  timeline.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());

  return timeline;
}

export default router;