- Implement multi-format document support (PDF, XLSX, CSV, PPTX, TXT, Images) - Add S3-compatible storage service with tenant isolation - Create document organization service with hierarchical folders and tagging - Implement advanced document processing with table/chart extraction - Add batch upload capabilities (up to 50 files) - Create comprehensive document validation and security scanning - Implement automatic metadata extraction and categorization - Add document version control system - Update DEVELOPMENT_PLAN.md to mark Week 2 as completed - Add WEEK2_COMPLETION_SUMMARY.md with detailed implementation notes - All tests passing (6/6) - 100% success rate
218 lines
7.1 KiB
Python
218 lines
7.1 KiB
Python
"""
|
|
Configuration settings for the Virtual Board Member AI System.
|
|
"""
|
|
|
|
import os
|
|
from typing import List, Optional
|
|
from pydantic import Field, validator
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Application settings."""
|
|
|
|
# Application Configuration
|
|
PROJECT_NAME: str = "Virtual Board Member AI"
|
|
APP_NAME: str = "Virtual Board Member AI"
|
|
APP_VERSION: str = "0.1.0"
|
|
VERSION: str = "0.1.0"
|
|
ENVIRONMENT: str = "development"
|
|
DEBUG: bool = True
|
|
LOG_LEVEL: str = "INFO"
|
|
|
|
# Server Configuration
|
|
HOST: str = "0.0.0.0"
|
|
PORT: int = 8000
|
|
WORKERS: int = 4
|
|
RELOAD: bool = True
|
|
|
|
# Security Configuration
|
|
SECRET_KEY: str = Field(..., description="Secret key for JWT tokens")
|
|
ALGORITHM: str = "HS256"
|
|
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60
|
|
REFRESH_TOKEN_EXPIRE_DAYS: int = 7
|
|
|
|
# Database Configuration
|
|
DATABASE_URL: str = Field(..., description="PostgreSQL database URL")
|
|
DATABASE_POOL_SIZE: int = 20
|
|
DATABASE_MAX_OVERFLOW: int = 30
|
|
DATABASE_POOL_TIMEOUT: int = 30
|
|
|
|
# Redis Configuration
|
|
REDIS_URL: str = Field(..., description="Redis connection URL")
|
|
REDIS_PASSWORD: Optional[str] = None
|
|
REDIS_DB: int = 0
|
|
REDIS_POOL_SIZE: int = 10
|
|
|
|
# Qdrant Vector Database
|
|
QDRANT_HOST: str = "localhost"
|
|
QDRANT_PORT: int = 6333
|
|
QDRANT_API_KEY: Optional[str] = None
|
|
QDRANT_COLLECTION_NAME: str = "board_documents"
|
|
QDRANT_VECTOR_SIZE: int = 1024
|
|
QDRANT_TIMEOUT: int = 30
|
|
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
EMBEDDING_DIMENSION: int = 384 # Dimension for all-MiniLM-L6-v2
|
|
|
|
# LLM Configuration (OpenRouter)
|
|
OPENROUTER_API_KEY: str = Field(..., description="OpenRouter API key")
|
|
OPENROUTER_BASE_URL: str = "https://openrouter.ai/api/v1"
|
|
OPENROUTER_MODEL: str = "gpt-4o-mini"
|
|
OPENROUTER_FALLBACK_MODEL: str = "gpt-3.5-turbo"
|
|
OPENROUTER_MAX_TOKENS: int = 4000
|
|
OPENROUTER_TEMPERATURE: float = 0.1
|
|
|
|
# Document Processing
|
|
MAX_FILE_SIZE: int = 104857600 # 100MB in bytes
|
|
SUPPORTED_FORMATS: str = "pdf,xlsx,csv,pptx,txt"
|
|
BATCH_UPLOAD_LIMIT: int = 50
|
|
OCR_ENABLED: bool = True
|
|
TESSERACT_CMD: str = "/usr/bin/tesseract"
|
|
|
|
# Storage Configuration (S3/MinIO)
|
|
STORAGE_TYPE: str = "minio" # minio or s3
|
|
MINIO_ENDPOINT: str = "localhost:9000"
|
|
MINIO_ACCESS_KEY: str = "minioadmin"
|
|
MINIO_SECRET_KEY: str = "minioadmin"
|
|
MINIO_BUCKET: str = "vbm-documents"
|
|
MINIO_SECURE: bool = False
|
|
|
|
# AWS S3 Configuration (if using S3)
|
|
AWS_ACCESS_KEY_ID: Optional[str] = None
|
|
AWS_SECRET_ACCESS_KEY: Optional[str] = None
|
|
AWS_REGION: str = "us-east-1"
|
|
S3_BUCKET: str = "vbm-documents"
|
|
S3_ENDPOINT_URL: Optional[str] = None # For MinIO or other S3-compatible services
|
|
|
|
# Authentication (OAuth 2.0/OIDC)
|
|
AUTH_PROVIDER: str = "auth0" # auth0, cognito, or custom
|
|
AUTH0_DOMAIN: Optional[str] = None
|
|
AUTH0_CLIENT_ID: Optional[str] = None
|
|
AUTH0_CLIENT_SECRET: Optional[str] = None
|
|
AUTH0_AUDIENCE: Optional[str] = None
|
|
|
|
# AWS Cognito Configuration (if using Cognito)
|
|
COGNITO_USER_POOL_ID: Optional[str] = None
|
|
COGNITO_CLIENT_ID: Optional[str] = None
|
|
COGNITO_REGION: str = "us-east-1"
|
|
|
|
# Email Configuration
|
|
SMTP_HOST: Optional[str] = None
|
|
SMTP_PORT: int = 587
|
|
SMTP_USERNAME: Optional[str] = None
|
|
SMTP_PASSWORD: Optional[str] = None
|
|
SMTP_TLS: bool = True
|
|
EMAIL_FROM: str = "noreply@yourcompany.com"
|
|
|
|
# Monitoring and Logging
|
|
PROMETHEUS_ENABLED: bool = True
|
|
PROMETHEUS_PORT: int = 9090
|
|
GRAFANA_PORT: int = 3000
|
|
LOG_FORMAT: str = "json"
|
|
LOG_FILE: str = "logs/app.log"
|
|
|
|
# Message Queue (Kafka)
|
|
KAFKA_BOOTSTRAP_SERVERS: str = "localhost:9092"
|
|
KAFKA_TOPIC_DOCUMENT_PROCESSING: str = "vbm-document-processing"
|
|
KAFKA_TOPIC_COMMITMENT_EXTRACTION: str = "vbm-commitment-extraction"
|
|
KAFKA_TOPIC_NOTIFICATIONS: str = "vbm-notifications"
|
|
|
|
# Celery Configuration
|
|
CELERY_BROKER_URL: str = "redis://localhost:6379/1"
|
|
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/2"
|
|
CELERY_TASK_SERIALIZER: str = "json"
|
|
CELERY_RESULT_SERIALIZER: str = "json"
|
|
CELERY_ACCEPT_CONTENT: str = "json"
|
|
CELERY_TIMEZONE: str = "UTC"
|
|
CELERY_ENABLE_UTC: bool = True
|
|
|
|
@property
|
|
def celery_accept_content_list(self) -> List[str]:
|
|
"""Get CELERY_ACCEPT_CONTENT as a list."""
|
|
return [self.CELERY_ACCEPT_CONTENT]
|
|
|
|
# External Integrations
|
|
SHAREPOINT_CLIENT_ID: Optional[str] = None
|
|
SHAREPOINT_CLIENT_SECRET: Optional[str] = None
|
|
SHAREPOINT_TENANT_ID: Optional[str] = None
|
|
SHAREPOINT_SITE_URL: Optional[str] = None
|
|
|
|
GOOGLE_DRIVE_CLIENT_ID: Optional[str] = None
|
|
GOOGLE_DRIVE_CLIENT_SECRET: Optional[str] = None
|
|
GOOGLE_DRIVE_REDIRECT_URI: str = "http://localhost:8000/auth/google/callback"
|
|
|
|
SLACK_BOT_TOKEN: Optional[str] = None
|
|
SLACK_SIGNING_SECRET: Optional[str] = None
|
|
SLACK_WEBHOOK_URL: Optional[str] = None
|
|
|
|
# Microsoft Graph API
|
|
MICROSOFT_CLIENT_ID: Optional[str] = None
|
|
MICROSOFT_CLIENT_SECRET: Optional[str] = None
|
|
MICROSOFT_TENANT_ID: Optional[str] = None
|
|
|
|
# Performance Configuration
|
|
CACHE_TTL: int = 3600 # 1 hour
|
|
RATE_LIMIT_REQUESTS: int = 100
|
|
RATE_LIMIT_WINDOW: int = 60 # seconds
|
|
MAX_CONCURRENT_REQUESTS: int = 50
|
|
|
|
# Feature Flags
|
|
FEATURE_COMMITMENT_TRACKING: bool = True
|
|
FEATURE_RISK_ANALYSIS: bool = True
|
|
FEATURE_MEETING_SUPPORT: bool = True
|
|
FEATURE_REAL_TIME_QUERIES: bool = True
|
|
FEATURE_BATCH_PROCESSING: bool = True
|
|
|
|
# Compliance and Security
|
|
ENABLE_AUDIT_LOGGING: bool = True
|
|
ENABLE_PII_DETECTION: bool = True
|
|
DATA_RETENTION_DAYS: int = 2555 # 7 years
|
|
ENCRYPTION_ENABLED: bool = True
|
|
BACKUP_ENABLED: bool = True
|
|
|
|
# Development and Testing
|
|
TESTING: bool = False
|
|
MOCK_LLM_RESPONSES: bool = False
|
|
SYNTHETIC_DATA_ENABLED: bool = True
|
|
SEED_DATA_ENABLED: bool = True
|
|
|
|
# CORS and Security
|
|
ALLOWED_HOSTS: List[str] = ["*"]
|
|
API_V1_STR: str = "/api/v1"
|
|
|
|
@validator("SUPPORTED_FORMATS", pre=True)
|
|
def parse_supported_formats(cls, v: str) -> str:
|
|
"""Parse supported formats string."""
|
|
if isinstance(v, str):
|
|
return v.lower()
|
|
return v
|
|
|
|
@property
|
|
def supported_formats_list(self) -> List[str]:
|
|
"""Get list of supported file formats."""
|
|
return [fmt.strip() for fmt in self.SUPPORTED_FORMATS.split(",")]
|
|
|
|
@property
|
|
def is_production(self) -> bool:
|
|
"""Check if running in production environment."""
|
|
return self.ENVIRONMENT.lower() == "production"
|
|
|
|
@property
|
|
def is_development(self) -> bool:
|
|
"""Check if running in development environment."""
|
|
return self.ENVIRONMENT.lower() == "development"
|
|
|
|
@property
|
|
def is_testing(self) -> bool:
|
|
"""Check if running in testing environment."""
|
|
return self.ENVIRONMENT.lower() == "testing"
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
env_file_encoding = "utf-8"
|
|
case_sensitive = True
|
|
|
|
|
|
# Create settings instance
|
|
settings = Settings()
|