Files
virtual_board_member/app/core/config.py
Jonathan Pressnell 1a8ec37bed feat: Complete Week 2 - Document Processing Pipeline
- Implement multi-format document support (PDF, XLSX, CSV, PPTX, TXT, Images)
- Add S3-compatible storage service with tenant isolation
- Create document organization service with hierarchical folders and tagging
- Implement advanced document processing with table/chart extraction
- Add batch upload capabilities (up to 50 files)
- Create comprehensive document validation and security scanning
- Implement automatic metadata extraction and categorization
- Add document version control system
- Update DEVELOPMENT_PLAN.md to mark Week 2 as completed
- Add WEEK2_COMPLETION_SUMMARY.md with detailed implementation notes
- All tests passing (6/6) - 100% success rate
2025-08-08 15:47:43 -04:00

218 lines
7.1 KiB
Python

"""
Configuration settings for the Virtual Board Member AI System.
"""
import os
from typing import List, Optional
from pydantic import Field, validator
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application settings."""
# Application Configuration
PROJECT_NAME: str = "Virtual Board Member AI"
APP_NAME: str = "Virtual Board Member AI"
APP_VERSION: str = "0.1.0"
VERSION: str = "0.1.0"
ENVIRONMENT: str = "development"
DEBUG: bool = True
LOG_LEVEL: str = "INFO"
# Server Configuration
HOST: str = "0.0.0.0"
PORT: int = 8000
WORKERS: int = 4
RELOAD: bool = True
# Security Configuration
SECRET_KEY: str = Field(..., description="Secret key for JWT tokens")
ALGORITHM: str = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60
REFRESH_TOKEN_EXPIRE_DAYS: int = 7
# Database Configuration
DATABASE_URL: str = Field(..., description="PostgreSQL database URL")
DATABASE_POOL_SIZE: int = 20
DATABASE_MAX_OVERFLOW: int = 30
DATABASE_POOL_TIMEOUT: int = 30
# Redis Configuration
REDIS_URL: str = Field(..., description="Redis connection URL")
REDIS_PASSWORD: Optional[str] = None
REDIS_DB: int = 0
REDIS_POOL_SIZE: int = 10
# Qdrant Vector Database
QDRANT_HOST: str = "localhost"
QDRANT_PORT: int = 6333
QDRANT_API_KEY: Optional[str] = None
QDRANT_COLLECTION_NAME: str = "board_documents"
QDRANT_VECTOR_SIZE: int = 1024
QDRANT_TIMEOUT: int = 30
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_DIMENSION: int = 384 # Dimension for all-MiniLM-L6-v2
# LLM Configuration (OpenRouter)
OPENROUTER_API_KEY: str = Field(..., description="OpenRouter API key")
OPENROUTER_BASE_URL: str = "https://openrouter.ai/api/v1"
OPENROUTER_MODEL: str = "gpt-4o-mini"
OPENROUTER_FALLBACK_MODEL: str = "gpt-3.5-turbo"
OPENROUTER_MAX_TOKENS: int = 4000
OPENROUTER_TEMPERATURE: float = 0.1
# Document Processing
MAX_FILE_SIZE: int = 104857600 # 100MB in bytes
SUPPORTED_FORMATS: str = "pdf,xlsx,csv,pptx,txt"
BATCH_UPLOAD_LIMIT: int = 50
OCR_ENABLED: bool = True
TESSERACT_CMD: str = "/usr/bin/tesseract"
# Storage Configuration (S3/MinIO)
STORAGE_TYPE: str = "minio" # minio or s3
MINIO_ENDPOINT: str = "localhost:9000"
MINIO_ACCESS_KEY: str = "minioadmin"
MINIO_SECRET_KEY: str = "minioadmin"
MINIO_BUCKET: str = "vbm-documents"
MINIO_SECURE: bool = False
# AWS S3 Configuration (if using S3)
AWS_ACCESS_KEY_ID: Optional[str] = None
AWS_SECRET_ACCESS_KEY: Optional[str] = None
AWS_REGION: str = "us-east-1"
S3_BUCKET: str = "vbm-documents"
S3_ENDPOINT_URL: Optional[str] = None # For MinIO or other S3-compatible services
# Authentication (OAuth 2.0/OIDC)
AUTH_PROVIDER: str = "auth0" # auth0, cognito, or custom
AUTH0_DOMAIN: Optional[str] = None
AUTH0_CLIENT_ID: Optional[str] = None
AUTH0_CLIENT_SECRET: Optional[str] = None
AUTH0_AUDIENCE: Optional[str] = None
# AWS Cognito Configuration (if using Cognito)
COGNITO_USER_POOL_ID: Optional[str] = None
COGNITO_CLIENT_ID: Optional[str] = None
COGNITO_REGION: str = "us-east-1"
# Email Configuration
SMTP_HOST: Optional[str] = None
SMTP_PORT: int = 587
SMTP_USERNAME: Optional[str] = None
SMTP_PASSWORD: Optional[str] = None
SMTP_TLS: bool = True
EMAIL_FROM: str = "noreply@yourcompany.com"
# Monitoring and Logging
PROMETHEUS_ENABLED: bool = True
PROMETHEUS_PORT: int = 9090
GRAFANA_PORT: int = 3000
LOG_FORMAT: str = "json"
LOG_FILE: str = "logs/app.log"
# Message Queue (Kafka)
KAFKA_BOOTSTRAP_SERVERS: str = "localhost:9092"
KAFKA_TOPIC_DOCUMENT_PROCESSING: str = "vbm-document-processing"
KAFKA_TOPIC_COMMITMENT_EXTRACTION: str = "vbm-commitment-extraction"
KAFKA_TOPIC_NOTIFICATIONS: str = "vbm-notifications"
# Celery Configuration
CELERY_BROKER_URL: str = "redis://localhost:6379/1"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/2"
CELERY_TASK_SERIALIZER: str = "json"
CELERY_RESULT_SERIALIZER: str = "json"
CELERY_ACCEPT_CONTENT: str = "json"
CELERY_TIMEZONE: str = "UTC"
CELERY_ENABLE_UTC: bool = True
@property
def celery_accept_content_list(self) -> List[str]:
"""Get CELERY_ACCEPT_CONTENT as a list."""
return [self.CELERY_ACCEPT_CONTENT]
# External Integrations
SHAREPOINT_CLIENT_ID: Optional[str] = None
SHAREPOINT_CLIENT_SECRET: Optional[str] = None
SHAREPOINT_TENANT_ID: Optional[str] = None
SHAREPOINT_SITE_URL: Optional[str] = None
GOOGLE_DRIVE_CLIENT_ID: Optional[str] = None
GOOGLE_DRIVE_CLIENT_SECRET: Optional[str] = None
GOOGLE_DRIVE_REDIRECT_URI: str = "http://localhost:8000/auth/google/callback"
SLACK_BOT_TOKEN: Optional[str] = None
SLACK_SIGNING_SECRET: Optional[str] = None
SLACK_WEBHOOK_URL: Optional[str] = None
# Microsoft Graph API
MICROSOFT_CLIENT_ID: Optional[str] = None
MICROSOFT_CLIENT_SECRET: Optional[str] = None
MICROSOFT_TENANT_ID: Optional[str] = None
# Performance Configuration
CACHE_TTL: int = 3600 # 1 hour
RATE_LIMIT_REQUESTS: int = 100
RATE_LIMIT_WINDOW: int = 60 # seconds
MAX_CONCURRENT_REQUESTS: int = 50
# Feature Flags
FEATURE_COMMITMENT_TRACKING: bool = True
FEATURE_RISK_ANALYSIS: bool = True
FEATURE_MEETING_SUPPORT: bool = True
FEATURE_REAL_TIME_QUERIES: bool = True
FEATURE_BATCH_PROCESSING: bool = True
# Compliance and Security
ENABLE_AUDIT_LOGGING: bool = True
ENABLE_PII_DETECTION: bool = True
DATA_RETENTION_DAYS: int = 2555 # 7 years
ENCRYPTION_ENABLED: bool = True
BACKUP_ENABLED: bool = True
# Development and Testing
TESTING: bool = False
MOCK_LLM_RESPONSES: bool = False
SYNTHETIC_DATA_ENABLED: bool = True
SEED_DATA_ENABLED: bool = True
# CORS and Security
ALLOWED_HOSTS: List[str] = ["*"]
API_V1_STR: str = "/api/v1"
@validator("SUPPORTED_FORMATS", pre=True)
def parse_supported_formats(cls, v: str) -> str:
"""Parse supported formats string."""
if isinstance(v, str):
return v.lower()
return v
@property
def supported_formats_list(self) -> List[str]:
"""Get list of supported file formats."""
return [fmt.strip() for fmt in self.SUPPORTED_FORMATS.split(",")]
@property
def is_production(self) -> bool:
"""Check if running in production environment."""
return self.ENVIRONMENT.lower() == "production"
@property
def is_development(self) -> bool:
"""Check if running in development environment."""
return self.ENVIRONMENT.lower() == "development"
@property
def is_testing(self) -> bool:
"""Check if running in testing environment."""
return self.ENVIRONMENT.lower() == "testing"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
case_sensitive = True
# Create settings instance
settings = Settings()