feat: Complete Week 2 - Document Processing Pipeline
- Implement multi-format document support (PDF, XLSX, CSV, PPTX, TXT, Images) - Add S3-compatible storage service with tenant isolation - Create document organization service with hierarchical folders and tagging - Implement advanced document processing with table/chart extraction - Add batch upload capabilities (up to 50 files) - Create comprehensive document validation and security scanning - Implement automatic metadata extraction and categorization - Add document version control system - Update DEVELOPMENT_PLAN.md to mark Week 2 as completed - Add WEEK2_COMPLETION_SUMMARY.md with detailed implementation notes - All tests passing (6/6) - 100% success rate
This commit is contained in:
208
app/core/auth.py
Normal file
208
app/core/auth.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
Authentication and authorization service for the Virtual Board Member AI System.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Dict, Any
|
||||
from fastapi import HTTPException, Depends, status
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from jose import JWTError, jwt
|
||||
from passlib.context import CryptContext
|
||||
from sqlalchemy.orm import Session
|
||||
import redis.asyncio as redis
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import get_db
|
||||
from app.models.user import User
|
||||
from app.models.tenant import Tenant
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Security configurations
|
||||
security = HTTPBearer()
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
|
||||
class AuthService:
|
||||
"""Authentication service with tenant-aware authentication."""
|
||||
|
||||
def __init__(self):
|
||||
self.redis_client = None
|
||||
self._init_redis()
|
||||
|
||||
async def _init_redis(self):
|
||||
"""Initialize Redis connection for session management."""
|
||||
try:
|
||||
self.redis_client = redis.from_url(
|
||||
settings.REDIS_URL,
|
||||
encoding="utf-8",
|
||||
decode_responses=True
|
||||
)
|
||||
await self.redis_client.ping()
|
||||
logger.info("Redis connection established for auth service")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Redis: {e}")
|
||||
self.redis_client = None
|
||||
|
||||
def verify_password(self, plain_password: str, hashed_password: str) -> bool:
|
||||
"""Verify a password against its hash."""
|
||||
return pwd_context.verify(plain_password, hashed_password)
|
||||
|
||||
def get_password_hash(self, password: str) -> str:
|
||||
"""Generate password hash."""
|
||||
return pwd_context.hash(password)
|
||||
|
||||
def create_access_token(self, data: Dict[str, Any], expires_delta: Optional[timedelta] = None) -> str:
|
||||
"""Create JWT access token."""
|
||||
to_encode = data.copy()
|
||||
if expires_delta:
|
||||
expire = datetime.utcnow() + expires_delta
|
||||
else:
|
||||
expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
||||
|
||||
to_encode.update({"exp": expire})
|
||||
encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM)
|
||||
return encoded_jwt
|
||||
|
||||
def verify_token(self, token: str) -> Dict[str, Any]:
|
||||
"""Verify and decode JWT token."""
|
||||
try:
|
||||
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM])
|
||||
return payload
|
||||
except JWTError as e:
|
||||
logger.error(f"Token verification failed: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
async def create_session(self, user_id: str, tenant_id: str, token: str) -> bool:
|
||||
"""Create user session in Redis."""
|
||||
if not self.redis_client:
|
||||
logger.warning("Redis not available, session not created")
|
||||
return False
|
||||
|
||||
try:
|
||||
session_key = f"session:{user_id}:{tenant_id}"
|
||||
session_data = {
|
||||
"user_id": user_id,
|
||||
"tenant_id": tenant_id,
|
||||
"token": token,
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
"expires_at": (datetime.utcnow() + timedelta(hours=24)).isoformat()
|
||||
}
|
||||
|
||||
await self.redis_client.hset(session_key, mapping=session_data)
|
||||
await self.redis_client.expire(session_key, 86400) # 24 hours
|
||||
logger.info(f"Session created for user {user_id} in tenant {tenant_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create session: {e}")
|
||||
return False
|
||||
|
||||
async def get_session(self, user_id: str, tenant_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get user session from Redis."""
|
||||
if not self.redis_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
session_key = f"session:{user_id}:{tenant_id}"
|
||||
session_data = await self.redis_client.hgetall(session_key)
|
||||
|
||||
if session_data:
|
||||
expires_at = datetime.fromisoformat(session_data["expires_at"])
|
||||
if datetime.utcnow() < expires_at:
|
||||
return session_data
|
||||
else:
|
||||
await self.redis_client.delete(session_key)
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get session: {e}")
|
||||
return None
|
||||
|
||||
async def invalidate_session(self, user_id: str, tenant_id: str) -> bool:
|
||||
"""Invalidate user session."""
|
||||
if not self.redis_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
session_key = f"session:{user_id}:{tenant_id}"
|
||||
await self.redis_client.delete(session_key)
|
||||
logger.info(f"Session invalidated for user {user_id} in tenant {tenant_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to invalidate session: {e}")
|
||||
return False
|
||||
|
||||
# Global auth service instance
|
||||
auth_service = AuthService()
|
||||
|
||||
async def get_current_user(
|
||||
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||
db: Session = Depends(get_db)
|
||||
) -> User:
|
||||
"""Get current authenticated user with tenant context."""
|
||||
token = credentials.credentials
|
||||
payload = auth_service.verify_token(token)
|
||||
|
||||
user_id: str = payload.get("sub")
|
||||
tenant_id: str = payload.get("tenant_id")
|
||||
|
||||
if user_id is None or tenant_id is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid token payload",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
# Verify session exists
|
||||
session = await auth_service.get_session(user_id, tenant_id)
|
||||
if not session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Session expired or invalid",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
# Get user from database
|
||||
user = db.query(User).filter(
|
||||
User.id == user_id,
|
||||
User.tenant_id == tenant_id
|
||||
).first()
|
||||
|
||||
if user is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="User not found",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
return user
|
||||
|
||||
async def get_current_active_user(current_user: User = Depends(get_current_user)) -> User:
|
||||
"""Get current active user."""
|
||||
if not current_user.is_active:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Inactive user"
|
||||
)
|
||||
return current_user
|
||||
|
||||
def require_role(required_role: str):
|
||||
"""Decorator to require specific user role."""
|
||||
def role_checker(current_user: User = Depends(get_current_active_user)) -> User:
|
||||
if current_user.role != required_role and current_user.role != "admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
return current_user
|
||||
return role_checker
|
||||
|
||||
def require_tenant_access():
|
||||
"""Decorator to ensure user has access to the specified tenant."""
|
||||
def tenant_checker(current_user: User = Depends(get_current_active_user)) -> User:
|
||||
# Additional tenant-specific checks can be added here
|
||||
return current_user
|
||||
return tenant_checker
|
||||
266
app/core/cache.py
Normal file
266
app/core/cache.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""
|
||||
Redis caching service for the Virtual Board Member AI System.
|
||||
"""
|
||||
import logging
|
||||
import json
|
||||
import hashlib
|
||||
from typing import Optional, Any, Dict, List, Union
|
||||
from datetime import timedelta
|
||||
import redis.asyncio as redis
|
||||
from functools import wraps
|
||||
import pickle
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CacheService:
|
||||
"""Redis caching service with tenant-aware caching."""
|
||||
|
||||
def __init__(self):
|
||||
self.redis_client = None
|
||||
# Initialize Redis client lazily when needed
|
||||
|
||||
async def _init_redis(self):
|
||||
"""Initialize Redis connection."""
|
||||
try:
|
||||
self.redis_client = redis.from_url(
|
||||
settings.REDIS_URL,
|
||||
encoding="utf-8",
|
||||
decode_responses=False # Keep as bytes for pickle support
|
||||
)
|
||||
await self.redis_client.ping()
|
||||
logger.info("Redis connection established for cache service")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Redis: {e}")
|
||||
self.redis_client = None
|
||||
|
||||
def _generate_key(self, prefix: str, tenant_id: str, *args, **kwargs) -> str:
|
||||
"""Generate cache key with tenant isolation."""
|
||||
# Create a hash of the arguments for consistent key generation
|
||||
key_parts = [prefix, tenant_id]
|
||||
|
||||
if args:
|
||||
key_parts.extend([str(arg) for arg in args])
|
||||
|
||||
if kwargs:
|
||||
# Sort kwargs for consistent key generation
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
key_parts.extend([f"{k}:{v}" for k, v in sorted_kwargs])
|
||||
|
||||
key_string = ":".join(key_parts)
|
||||
return hashlib.md5(key_string.encode()).hexdigest()
|
||||
|
||||
async def get(self, key: str, tenant_id: str) -> Optional[Any]:
|
||||
"""Get value from cache."""
|
||||
if not self.redis_client:
|
||||
await self._init_redis()
|
||||
|
||||
try:
|
||||
full_key = f"cache:{tenant_id}:{key}"
|
||||
data = await self.redis_client.get(full_key)
|
||||
|
||||
if data:
|
||||
# Try to deserialize as JSON first, then pickle
|
||||
try:
|
||||
return json.loads(data.decode())
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
try:
|
||||
return pickle.loads(data)
|
||||
except pickle.UnpicklingError:
|
||||
logger.warning(f"Failed to deserialize cache data for key: {full_key}")
|
||||
return None
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Cache get error: {e}")
|
||||
return None
|
||||
|
||||
async def set(self, key: str, value: Any, tenant_id: str, expire: Optional[int] = None) -> bool:
|
||||
"""Set value in cache with optional expiration."""
|
||||
if not self.redis_client:
|
||||
await self._init_redis()
|
||||
|
||||
try:
|
||||
full_key = f"cache:{tenant_id}:{key}"
|
||||
|
||||
# Try to serialize as JSON first, fallback to pickle
|
||||
try:
|
||||
data = json.dumps(value).encode()
|
||||
except (TypeError, ValueError):
|
||||
data = pickle.dumps(value)
|
||||
|
||||
if expire:
|
||||
await self.redis_client.setex(full_key, expire, data)
|
||||
else:
|
||||
await self.redis_client.set(full_key, data)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Cache set error: {e}")
|
||||
return False
|
||||
|
||||
async def delete(self, key: str, tenant_id: str) -> bool:
|
||||
"""Delete value from cache."""
|
||||
if not self.redis_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
full_key = f"cache:{tenant_id}:{key}"
|
||||
result = await self.redis_client.delete(full_key)
|
||||
return result > 0
|
||||
except Exception as e:
|
||||
logger.error(f"Cache delete error: {e}")
|
||||
return False
|
||||
|
||||
async def delete_pattern(self, pattern: str, tenant_id: str) -> int:
|
||||
"""Delete all keys matching pattern for a tenant."""
|
||||
if not self.redis_client:
|
||||
return 0
|
||||
|
||||
try:
|
||||
full_pattern = f"cache:{tenant_id}:{pattern}"
|
||||
keys = await self.redis_client.keys(full_pattern)
|
||||
|
||||
if keys:
|
||||
result = await self.redis_client.delete(*keys)
|
||||
logger.info(f"Deleted {result} cache keys matching pattern: {full_pattern}")
|
||||
return result
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Cache delete pattern error: {e}")
|
||||
return 0
|
||||
|
||||
async def clear_tenant_cache(self, tenant_id: str) -> int:
|
||||
"""Clear all cache entries for a specific tenant."""
|
||||
return await self.delete_pattern("*", tenant_id)
|
||||
|
||||
async def get_many(self, keys: List[str], tenant_id: str) -> Dict[str, Any]:
|
||||
"""Get multiple values from cache."""
|
||||
if not self.redis_client:
|
||||
return {}
|
||||
|
||||
try:
|
||||
full_keys = [f"cache:{tenant_id}:{key}" for key in keys]
|
||||
values = await self.redis_client.mget(full_keys)
|
||||
|
||||
result = {}
|
||||
for key, value in zip(keys, values):
|
||||
if value is not None:
|
||||
try:
|
||||
result[key] = json.loads(value.decode())
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
try:
|
||||
result[key] = pickle.loads(value)
|
||||
except pickle.UnpicklingError:
|
||||
logger.warning(f"Failed to deserialize cache data for key: {key}")
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Cache get_many error: {e}")
|
||||
return {}
|
||||
|
||||
async def set_many(self, data: Dict[str, Any], tenant_id: str, expire: Optional[int] = None) -> bool:
|
||||
"""Set multiple values in cache."""
|
||||
if not self.redis_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
pipeline = self.redis_client.pipeline()
|
||||
|
||||
for key, value in data.items():
|
||||
full_key = f"cache:{tenant_id}:{key}"
|
||||
|
||||
try:
|
||||
serialized_value = json.dumps(value).encode()
|
||||
except (TypeError, ValueError):
|
||||
serialized_value = pickle.dumps(value)
|
||||
|
||||
if expire:
|
||||
pipeline.setex(full_key, expire, serialized_value)
|
||||
else:
|
||||
pipeline.set(full_key, serialized_value)
|
||||
|
||||
await pipeline.execute()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Cache set_many error: {e}")
|
||||
return False
|
||||
|
||||
async def increment(self, key: str, tenant_id: str, amount: int = 1) -> Optional[int]:
|
||||
"""Increment a counter in cache."""
|
||||
if not self.redis_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
full_key = f"cache:{tenant_id}:{key}"
|
||||
result = await self.redis_client.incrby(full_key, amount)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Cache increment error: {e}")
|
||||
return None
|
||||
|
||||
async def expire(self, key: str, tenant_id: str, seconds: int) -> bool:
|
||||
"""Set expiration for a cache key."""
|
||||
if not self.redis_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
full_key = f"cache:{tenant_id}:{key}"
|
||||
result = await self.redis_client.expire(full_key, seconds)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Cache expire error: {e}")
|
||||
return False
|
||||
|
||||
# Global cache service instance
|
||||
cache_service = CacheService()
|
||||
|
||||
def cache_result(prefix: str, expire: Optional[int] = 3600):
|
||||
"""Decorator to cache function results with tenant isolation."""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, tenant_id: str = None, **kwargs):
|
||||
if not tenant_id:
|
||||
# Try to extract tenant_id from args or kwargs
|
||||
if args and hasattr(args[0], 'tenant_id'):
|
||||
tenant_id = args[0].tenant_id
|
||||
elif 'tenant_id' in kwargs:
|
||||
tenant_id = kwargs['tenant_id']
|
||||
else:
|
||||
# If no tenant_id, skip caching
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
# Generate cache key
|
||||
cache_key = cache_service._generate_key(prefix, tenant_id, *args, **kwargs)
|
||||
|
||||
# Try to get from cache
|
||||
cached_result = await cache_service.get(cache_key, tenant_id)
|
||||
if cached_result is not None:
|
||||
logger.debug(f"Cache hit for key: {cache_key}")
|
||||
return cached_result
|
||||
|
||||
# Execute function and cache result
|
||||
result = await func(*args, **kwargs)
|
||||
await cache_service.set(cache_key, result, tenant_id, expire)
|
||||
logger.debug(f"Cache miss, stored result for key: {cache_key}")
|
||||
|
||||
return result
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
def invalidate_cache(prefix: str, pattern: str = "*"):
|
||||
"""Decorator to invalidate cache entries after function execution."""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, tenant_id: str = None, **kwargs):
|
||||
result = await func(*args, **kwargs)
|
||||
|
||||
if tenant_id:
|
||||
await cache_service.delete_pattern(pattern, tenant_id)
|
||||
logger.debug(f"Invalidated cache for tenant {tenant_id}, pattern: {pattern}")
|
||||
|
||||
return result
|
||||
return wrapper
|
||||
return decorator
|
||||
@@ -12,8 +12,10 @@ class Settings(BaseSettings):
|
||||
"""Application settings."""
|
||||
|
||||
# Application Configuration
|
||||
PROJECT_NAME: str = "Virtual Board Member AI"
|
||||
APP_NAME: str = "Virtual Board Member AI"
|
||||
APP_VERSION: str = "0.1.0"
|
||||
VERSION: str = "0.1.0"
|
||||
ENVIRONMENT: str = "development"
|
||||
DEBUG: bool = True
|
||||
LOG_LEVEL: str = "INFO"
|
||||
@@ -48,6 +50,9 @@ class Settings(BaseSettings):
|
||||
QDRANT_API_KEY: Optional[str] = None
|
||||
QDRANT_COLLECTION_NAME: str = "board_documents"
|
||||
QDRANT_VECTOR_SIZE: int = 1024
|
||||
QDRANT_TIMEOUT: int = 30
|
||||
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
|
||||
EMBEDDING_DIMENSION: int = 384 # Dimension for all-MiniLM-L6-v2
|
||||
|
||||
# LLM Configuration (OpenRouter)
|
||||
OPENROUTER_API_KEY: str = Field(..., description="OpenRouter API key")
|
||||
@@ -77,6 +82,7 @@ class Settings(BaseSettings):
|
||||
AWS_SECRET_ACCESS_KEY: Optional[str] = None
|
||||
AWS_REGION: str = "us-east-1"
|
||||
S3_BUCKET: str = "vbm-documents"
|
||||
S3_ENDPOINT_URL: Optional[str] = None # For MinIO or other S3-compatible services
|
||||
|
||||
# Authentication (OAuth 2.0/OIDC)
|
||||
AUTH_PROVIDER: str = "auth0" # auth0, cognito, or custom
|
||||
@@ -172,6 +178,7 @@ class Settings(BaseSettings):
|
||||
|
||||
# CORS and Security
|
||||
ALLOWED_HOSTS: List[str] = ["*"]
|
||||
API_V1_STR: str = "/api/v1"
|
||||
|
||||
@validator("SUPPORTED_FORMATS", pre=True)
|
||||
def parse_supported_formats(cls, v: str) -> str:
|
||||
|
||||
@@ -25,12 +25,15 @@ async_engine = create_async_engine(
|
||||
)
|
||||
|
||||
# Create sync engine for migrations
|
||||
sync_engine = create_engine(
|
||||
engine = create_engine(
|
||||
settings.DATABASE_URL,
|
||||
echo=settings.DEBUG,
|
||||
poolclass=StaticPool if settings.TESTING else None,
|
||||
)
|
||||
|
||||
# Alias for compatibility
|
||||
sync_engine = engine
|
||||
|
||||
# Create session factory
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
async_engine,
|
||||
@@ -58,6 +61,17 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
await session.close()
|
||||
|
||||
|
||||
def get_db_sync():
|
||||
"""Synchronous database session for non-async contexts."""
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def init_db() -> None:
|
||||
"""Initialize database tables."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user