Files
cim_summary/CONFIGURATION_GUIDE.md

11 KiB

Configuration Guide

Complete Environment Setup and Configuration for CIM Document Processor

🎯 Overview

This guide provides comprehensive configuration instructions for setting up the CIM Document Processor in development, staging, and production environments.


🔧 Environment Variables

Required Environment Variables

Google Cloud Configuration

# Google Cloud Project
GCLOUD_PROJECT_ID=your-project-id

# Google Cloud Storage
GCS_BUCKET_NAME=your-storage-bucket
DOCUMENT_AI_OUTPUT_BUCKET_NAME=your-document-ai-bucket

# Document AI Configuration
DOCUMENT_AI_LOCATION=us
DOCUMENT_AI_PROCESSOR_ID=your-processor-id

# Service Account
GOOGLE_APPLICATION_CREDENTIALS=./serviceAccountKey.json

Supabase Configuration

# Supabase Project
SUPABASE_URL=https://your-project.supabase.co
SUPABASE_ANON_KEY=your-anon-key
SUPABASE_SERVICE_KEY=your-service-key

LLM Configuration

# LLM Provider Selection
LLM_PROVIDER=anthropic  # or 'openai'

# Anthropic (Claude AI)
ANTHROPIC_API_KEY=your-anthropic-key

# OpenAI (Alternative)
OPENAI_API_KEY=your-openai-key

# LLM Settings
LLM_MODEL=gpt-4  # or 'claude-3-opus-20240229'
LLM_MAX_TOKENS=3500
LLM_TEMPERATURE=0.1
LLM_PROMPT_BUFFER=500

Firebase Configuration

# Firebase Project
FB_PROJECT_ID=your-firebase-project
FB_STORAGE_BUCKET=your-firebase-bucket
FB_API_KEY=your-firebase-api-key
FB_AUTH_DOMAIN=your-project.firebaseapp.com

Optional Environment Variables

Vector Database Configuration

# Vector Provider
VECTOR_PROVIDER=supabase  # or 'pinecone'

# Pinecone (if using Pinecone)
PINECONE_API_KEY=your-pinecone-key
PINECONE_INDEX=your-pinecone-index

Security Configuration

# JWT Configuration
JWT_SECRET=your-jwt-secret
JWT_EXPIRES_IN=1h
JWT_REFRESH_SECRET=your-refresh-secret
JWT_REFRESH_EXPIRES_IN=7d

# Rate Limiting
RATE_LIMIT_WINDOW_MS=900000  # 15 minutes
RATE_LIMIT_MAX_REQUESTS=100

File Upload Configuration

# File Limits
MAX_FILE_SIZE=104857600  # 100MB
ALLOWED_FILE_TYPES=application/pdf

# Security
BCRYPT_ROUNDS=12

Logging Configuration

# Logging
LOG_LEVEL=info  # error, warn, info, debug
LOG_FILE=logs/app.log

Agentic RAG Configuration

# Agentic RAG Settings
AGENTIC_RAG_ENABLED=true
AGENTIC_RAG_MAX_AGENTS=6
AGENTIC_RAG_PARALLEL_PROCESSING=true
AGENTIC_RAG_VALIDATION_STRICT=true
AGENTIC_RAG_RETRY_ATTEMPTS=3
AGENTIC_RAG_TIMEOUT_PER_AGENT=60000

🚀 Environment Setup

Development Environment

1. Clone Repository

git clone <repository-url>
cd cim_summary

2. Install Dependencies

# Backend dependencies
cd backend
npm install

# Frontend dependencies
cd ../frontend
npm install

3. Environment Configuration

# Backend environment
cd backend
cp .env.example .env
# Edit .env with your configuration

# Frontend environment
cd ../frontend
cp .env.example .env
# Edit .env with your configuration

4. Google Cloud Setup

# Install Google Cloud SDK
curl https://sdk.cloud.google.com | bash
exec -l $SHELL

# Authenticate with Google Cloud
gcloud auth login
gcloud config set project YOUR_PROJECT_ID

# Enable required APIs
gcloud services enable documentai.googleapis.com
gcloud services enable storage.googleapis.com
gcloud services enable cloudfunctions.googleapis.com

# Create service account
gcloud iam service-accounts create cim-processor \
  --display-name="CIM Document Processor"

# Download service account key
gcloud iam service-accounts keys create serviceAccountKey.json \
  --iam-account=cim-processor@YOUR_PROJECT_ID.iam.gserviceaccount.com

5. Supabase Setup

# Install Supabase CLI
npm install -g supabase

# Login to Supabase
supabase login

# Initialize Supabase project
supabase init

# Link to your Supabase project
supabase link --project-ref YOUR_PROJECT_REF

6. Firebase Setup

# Install Firebase CLI
npm install -g firebase-tools

# Login to Firebase
firebase login

# Initialize Firebase project
firebase init

# Select your project
firebase use YOUR_PROJECT_ID

Production Environment

1. Environment Variables

# Production environment variables
NODE_ENV=production
PORT=5001

# Ensure all required variables are set
GCLOUD_PROJECT_ID=your-production-project
SUPABASE_URL=https://your-production-project.supabase.co
ANTHROPIC_API_KEY=your-production-anthropic-key

2. Security Configuration

# Use strong secrets in production
JWT_SECRET=your-very-strong-jwt-secret
JWT_REFRESH_SECRET=your-very-strong-refresh-secret

# Enable strict validation
AGENTIC_RAG_VALIDATION_STRICT=true

3. Monitoring Configuration

# Enable detailed logging
LOG_LEVEL=info
LOG_FILE=/var/log/cim-processor/app.log

# Set appropriate rate limits
RATE_LIMIT_MAX_REQUESTS=50

🔍 Configuration Validation

Validation Script

# Run configuration validation
cd backend
npm run validate-config

Configuration Health Check

// Configuration validation function
export const validateConfiguration = () => {
  const errors: string[] = [];
  
  // Check required environment variables
  if (!process.env.GCLOUD_PROJECT_ID) {
    errors.push('GCLOUD_PROJECT_ID is required');
  }
  
  if (!process.env.SUPABASE_URL) {
    errors.push('SUPABASE_URL is required');
  }
  
  if (!process.env.ANTHROPIC_API_KEY && !process.env.OPENAI_API_KEY) {
    errors.push('Either ANTHROPIC_API_KEY or OPENAI_API_KEY is required');
  }
  
  // Check file size limits
  const maxFileSize = parseInt(process.env.MAX_FILE_SIZE || '104857600');
  if (maxFileSize > 104857600) {
    errors.push('MAX_FILE_SIZE cannot exceed 100MB');
  }
  
  return {
    isValid: errors.length === 0,
    errors
  };
};

Health Check Endpoint

# Check configuration health
curl -X GET http://localhost:5001/api/health/config \
  -H "Authorization: Bearer <token>"

🔐 Security Configuration

Authentication Setup

Firebase Authentication

// Firebase configuration
const firebaseConfig = {
  apiKey: process.env.FB_API_KEY,
  authDomain: process.env.FB_AUTH_DOMAIN,
  projectId: process.env.FB_PROJECT_ID,
  storageBucket: process.env.FB_STORAGE_BUCKET,
  messagingSenderId: process.env.FB_MESSAGING_SENDER_ID,
  appId: process.env.FB_APP_ID
};

JWT Configuration

// JWT settings
const jwtConfig = {
  secret: process.env.JWT_SECRET || 'default-secret',
  expiresIn: process.env.JWT_EXPIRES_IN || '1h',
  refreshSecret: process.env.JWT_REFRESH_SECRET || 'default-refresh-secret',
  refreshExpiresIn: process.env.JWT_REFRESH_EXPIRES_IN || '7d'
};

Rate Limiting

// Rate limiting configuration
const rateLimitConfig = {
  windowMs: parseInt(process.env.RATE_LIMIT_WINDOW_MS || '900000'),
  max: parseInt(process.env.RATE_LIMIT_MAX_REQUESTS || '100'),
  message: 'Too many requests from this IP'
};

CORS Configuration

// CORS settings
const corsConfig = {
  origin: process.env.ALLOWED_ORIGINS?.split(',') || ['http://localhost:3000'],
  credentials: true,
  methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
  allowedHeaders: ['Content-Type', 'Authorization']
};

📊 Performance Configuration

Memory and CPU Limits

# Node.js memory limits
NODE_OPTIONS="--max-old-space-size=2048"

# Process limits
PM2_MAX_MEMORY_RESTART=2G
PM2_INSTANCES=4

Database Connection Pooling

// Database connection settings
const dbConfig = {
  pool: {
    min: 2,
    max: 10,
    acquireTimeoutMillis: 30000,
    createTimeoutMillis: 30000,
    destroyTimeoutMillis: 5000,
    idleTimeoutMillis: 30000,
    reapIntervalMillis: 1000,
    createRetryIntervalMillis: 100
  }
};

Caching Configuration

// Cache settings
const cacheConfig = {
  ttl: 300000, // 5 minutes
  maxSize: 100,
  checkPeriod: 60000 // 1 minute
};

🧪 Testing Configuration

Test Environment Variables

# Test environment
NODE_ENV=test
TEST_DATABASE_URL=postgresql://test:test@localhost:5432/cim_test
TEST_GCLOUD_PROJECT_ID=test-project
TEST_ANTHROPIC_API_KEY=test-key

Test Configuration

// Test settings
const testConfig = {
  timeout: 30000,
  retries: 3,
  parallel: true,
  coverage: {
    threshold: {
      global: {
        branches: 80,
        functions: 80,
        lines: 80,
        statements: 80
      }
    }
  }
};

🔄 Environment-Specific Configurations

Development

# Development settings
NODE_ENV=development
LOG_LEVEL=debug
AGENTIC_RAG_VALIDATION_STRICT=false
RATE_LIMIT_MAX_REQUESTS=1000

Staging

# Staging settings
NODE_ENV=staging
LOG_LEVEL=info
AGENTIC_RAG_VALIDATION_STRICT=true
RATE_LIMIT_MAX_REQUESTS=100

Production

# Production settings
NODE_ENV=production
LOG_LEVEL=warn
AGENTIC_RAG_VALIDATION_STRICT=true
RATE_LIMIT_MAX_REQUESTS=50

📋 Configuration Checklist

Pre-Deployment Checklist

  • All required environment variables are set
  • Google Cloud APIs are enabled
  • Service account has proper permissions
  • Supabase project is configured
  • Firebase project is set up
  • LLM API keys are valid
  • Database migrations are run
  • File storage buckets are created
  • CORS is properly configured
  • Rate limiting is configured
  • Logging is set up
  • Monitoring is configured

Security Checklist

  • JWT secrets are strong and unique
  • API keys are properly secured
  • CORS origins are restricted
  • Rate limiting is enabled
  • Input validation is configured
  • Error messages don't leak sensitive information
  • HTTPS is enabled in production
  • Service account permissions are minimal

Performance Checklist

  • Database connection pooling is configured
  • Caching is enabled
  • Memory limits are set
  • Process limits are configured
  • Monitoring is set up
  • Log rotation is configured
  • Backup procedures are in place

🚨 Troubleshooting

Common Configuration Issues

Missing Environment Variables

# Check for missing variables
npm run check-env

Google Cloud Authentication

# Verify authentication
gcloud auth list
gcloud config list

Database Connection

# Test database connection
npm run test-db

API Key Validation

# Test API keys
npm run test-apis

Configuration Debugging

// Debug configuration
export const debugConfiguration = () => {
  console.log('Environment:', process.env.NODE_ENV);
  console.log('Google Cloud Project:', process.env.GCLOUD_PROJECT_ID);
  console.log('Supabase URL:', process.env.SUPABASE_URL);
  console.log('LLM Provider:', process.env.LLM_PROVIDER);
  console.log('Agentic RAG Enabled:', process.env.AGENTIC_RAG_ENABLED);
};

This comprehensive configuration guide ensures proper setup and configuration of the CIM Document Processor across all environments.