virtual_board_member/app/core/config.py

"""
Configuration settings for the Virtual Board Member AI System.
"""

import os
from typing import List, Optional
import os
from pydantic import Field, validator
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    """Application settings."""

    # Application Configuration
    PROJECT_NAME: str = "Virtual Board Member AI"
    APP_NAME: str = "Virtual Board Member AI"
    APP_VERSION: str = "0.1.0"
    VERSION: str = "0.1.0"
    ENVIRONMENT: str = "development"
    DEBUG: bool = True
    LOG_LEVEL: str = "INFO"

    # Server Configuration
    HOST: str = "0.0.0.0"
    PORT: int = 8000
    WORKERS: int = 4
    RELOAD: bool = True

    # Security Configuration
    SECRET_KEY: str = Field(..., description="Secret key for JWT tokens")
    ALGORITHM: str = "HS256"
    ACCESS_TOKEN_EXPIRE_MINUTES: int = 60
    REFRESH_TOKEN_EXPIRE_DAYS: int = 7

    # Database Configuration
    DATABASE_URL: str = Field(..., description="PostgreSQL database URL")
    DATABASE_POOL_SIZE: int = 20
    DATABASE_MAX_OVERFLOW: int = 30
    DATABASE_POOL_TIMEOUT: int = 30

    # Redis Configuration
    REDIS_URL: str = Field(..., description="Redis connection URL")
    REDIS_PASSWORD: Optional[str] = None
    REDIS_DB: int = 0
    REDIS_POOL_SIZE: int = 10

    # Qdrant Vector Database
    QDRANT_HOST: str = "localhost"
    QDRANT_PORT: int = 6333
    QDRANT_API_KEY: Optional[str] = None
    QDRANT_COLLECTION_NAME: str = "board_documents"
    QDRANT_VECTOR_SIZE: int = 1024
    QDRANT_TIMEOUT: int = 30
    EMBEDDING_MODEL: str = "voyageai/voyage-3-large"  # Updated to Voyage-3-large as per Week 3 plan
    EMBEDDING_DIMENSION: int = 1024  # Dimension for voyage-3-large
    EMBEDDING_BATCH_SIZE: int = 32
    EMBEDDING_MAX_LENGTH: int = 512
    VOYAGE_API_KEY: Optional[str] = None  # Voyage AI API key for embeddings

    # Document Chunking Configuration
    CHUNK_SIZE: int = 1200  # Target chunk size in tokens (1000-1500 range)
    CHUNK_OVERLAP: int = 200  # Overlap between chunks
    CHUNK_MIN_SIZE: int = 100  # Minimum chunk size
    CHUNK_MAX_SIZE: int = 1500  # Maximum chunk size

    # LLM Configuration (OpenRouter)
    OPENROUTER_API_KEY: str = Field(..., description="OpenRouter API key")
    OPENROUTER_BASE_URL: str = "https://openrouter.ai/api/v1"
    OPENROUTER_MODEL: str = "gpt-4o-mini"
    OPENROUTER_FALLBACK_MODEL: str = "gpt-3.5-turbo"
    OPENROUTER_MAX_TOKENS: int = 4000
    OPENROUTER_TEMPERATURE: float = 0.1

    # Document Processing
    MAX_FILE_SIZE: int = 104857600  # 100MB in bytes
    SUPPORTED_FORMATS: str = "pdf,xlsx,csv,pptx,txt"
    BATCH_UPLOAD_LIMIT: int = 50
    OCR_ENABLED: bool = True
    TESSERACT_CMD: str = "/usr/bin/tesseract"

    # Storage Configuration (S3/MinIO)
    STORAGE_TYPE: str = "minio"  # minio or s3
    MINIO_ENDPOINT: str = "localhost:9000"
    MINIO_ACCESS_KEY: str = "minioadmin"
    MINIO_SECRET_KEY: str = "minioadmin"
    MINIO_BUCKET: str = "vbm-documents"
    MINIO_SECURE: bool = False

    # AWS S3 Configuration (if using S3)
    AWS_ACCESS_KEY_ID: Optional[str] = None
    AWS_SECRET_ACCESS_KEY: Optional[str] = None
    AWS_REGION: str = "us-east-1"
    S3_BUCKET: str = "vbm-documents"
    S3_ENDPOINT_URL: Optional[str] = None  # For MinIO or other S3-compatible services

    # Authentication (OAuth 2.0/OIDC)
    AUTH_PROVIDER: str = "auth0"  # auth0, cognito, or custom
    AUTH0_DOMAIN: Optional[str] = None
    AUTH0_CLIENT_ID: Optional[str] = None
    AUTH0_CLIENT_SECRET: Optional[str] = None
    AUTH0_AUDIENCE: Optional[str] = None

    # AWS Cognito Configuration (if using Cognito)
    COGNITO_USER_POOL_ID: Optional[str] = None
    COGNITO_CLIENT_ID: Optional[str] = None
    COGNITO_REGION: str = "us-east-1"

    # Email Configuration
    SMTP_HOST: Optional[str] = None
    SMTP_PORT: int = 587
    SMTP_USERNAME: Optional[str] = None
    SMTP_PASSWORD: Optional[str] = None
    SMTP_TLS: bool = True
    EMAIL_FROM: str = "noreply@yourcompany.com"

    # Monitoring and Logging
    PROMETHEUS_ENABLED: bool = True
    PROMETHEUS_PORT: int = 9090
    GRAFANA_PORT: int = 3000
    LOG_FORMAT: str = "json"
    LOG_FILE: str = "logs/app.log"

    # Message Queue (Kafka)
    KAFKA_BOOTSTRAP_SERVERS: str = "localhost:9092"
    KAFKA_TOPIC_DOCUMENT_PROCESSING: str = "vbm-document-processing"
    KAFKA_TOPIC_COMMITMENT_EXTRACTION: str = "vbm-commitment-extraction"
    KAFKA_TOPIC_NOTIFICATIONS: str = "vbm-notifications"

    # Celery Configuration
    CELERY_BROKER_URL: str = "redis://localhost:6379/1"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/2"
    CELERY_TASK_SERIALIZER: str = "json"
    CELERY_RESULT_SERIALIZER: str = "json"
    CELERY_ACCEPT_CONTENT: str = "json"
    CELERY_TIMEZONE: str = "UTC"
    CELERY_ENABLE_UTC: bool = True

    @property
    def celery_accept_content_list(self) -> List[str]:
        """Get CELERY_ACCEPT_CONTENT as a list."""
        return [self.CELERY_ACCEPT_CONTENT]

    # External Integrations
    SHAREPOINT_CLIENT_ID: Optional[str] = None
    SHAREPOINT_CLIENT_SECRET: Optional[str] = None
    SHAREPOINT_TENANT_ID: Optional[str] = None
    SHAREPOINT_SITE_URL: Optional[str] = None

    GOOGLE_DRIVE_CLIENT_ID: Optional[str] = None
    GOOGLE_DRIVE_CLIENT_SECRET: Optional[str] = None
    GOOGLE_DRIVE_REDIRECT_URI: str = "http://localhost:8000/auth/google/callback"

    SLACK_BOT_TOKEN: Optional[str] = None
    SLACK_SIGNING_SECRET: Optional[str] = None
    SLACK_WEBHOOK_URL: Optional[str] = None

    # Microsoft Graph API
    MICROSOFT_CLIENT_ID: Optional[str] = None
    MICROSOFT_CLIENT_SECRET: Optional[str] = None
    MICROSOFT_TENANT_ID: Optional[str] = None

    # Performance Configuration
    CACHE_TTL: int = 3600  # 1 hour
    RATE_LIMIT_REQUESTS: int = 100
    RATE_LIMIT_WINDOW: int = 60  # seconds
    MAX_CONCURRENT_REQUESTS: int = 50

    # Feature Flags
    FEATURE_COMMITMENT_TRACKING: bool = True
    FEATURE_RISK_ANALYSIS: bool = True
    FEATURE_MEETING_SUPPORT: bool = True
    FEATURE_REAL_TIME_QUERIES: bool = True
    FEATURE_BATCH_PROCESSING: bool = True

    # Compliance and Security
    ENABLE_AUDIT_LOGGING: bool = True
    ENABLE_PII_DETECTION: bool = True
    DATA_RETENTION_DAYS: int = 2555  # 7 years
    ENCRYPTION_ENABLED: bool = True
    BACKUP_ENABLED: bool = True

    # Development and Testing
    # Auto-detect testing when running under pytest if env not set
    TESTING: bool = bool(os.environ.get("PYTEST_CURRENT_TEST"))
    MOCK_LLM_RESPONSES: bool = False
    SYNTHETIC_DATA_ENABLED: bool = True
    SEED_DATA_ENABLED: bool = True

    # CORS and Security
    ALLOWED_HOSTS: List[str] = ["*"]
    API_V1_STR: str = "/api/v1"
    ENABLE_SUBDOMAIN_TENANTS: bool = False

    @validator("SUPPORTED_FORMATS", pre=True)
    def parse_supported_formats(cls, v: str) -> str:
        """Parse supported formats string."""
        if isinstance(v, str):
            return v.lower()
        return v

    @property
    def supported_formats_list(self) -> List[str]:
        """Get list of supported file formats."""
        return [fmt.strip() for fmt in self.SUPPORTED_FORMATS.split(",")]

    @property
    def is_production(self) -> bool:
        """Check if running in production environment."""
        return self.ENVIRONMENT.lower() == "production"

    @property
    def is_development(self) -> bool:
        """Check if running in development environment."""
        return self.ENVIRONMENT.lower() == "development"

    @property
    def is_testing(self) -> bool:
        """Check if running in testing environment."""
        return self.ENVIRONMENT.lower() == "testing"

    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"
        case_sensitive = True


# Create settings instance
settings = Settings()