""" Storage service for handling file storage with S3-compatible backend and multi-tenant support. """ import asyncio import logging import hashlib import mimetypes from typing import Optional, Dict, Any, List from pathlib import Path import uuid from datetime import datetime, timedelta import boto3 from botocore.exceptions import ClientError, NoCredentialsError import aiofiles from fastapi import UploadFile from app.core.config import settings from app.models.tenant import Tenant logger = logging.getLogger(__name__) class StorageService: """Storage service with S3-compatible backend and multi-tenant support.""" def __init__(self, tenant: Tenant): self.tenant = tenant self.s3_client = None self.bucket_name = f"vbm-documents-{tenant.id}" # Initialize S3 client if credentials are available if settings.AWS_ACCESS_KEY_ID and settings.AWS_SECRET_ACCESS_KEY: self.s3_client = boto3.client( 's3', aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, region_name=settings.AWS_REGION or 'us-east-1', endpoint_url=settings.S3_ENDPOINT_URL # For MinIO or other S3-compatible services ) else: logger.warning("AWS credentials not configured, using local storage") async def upload_file(self, file: UploadFile, document_id: str) -> Dict[str, Any]: """ Upload a file to storage with security validation. """ try: # Security validation await self._validate_file_security(file) # Generate file path file_path = self._generate_file_path(document_id, file.filename) # Read file content content = await file.read() # Calculate checksum checksum = hashlib.sha256(content).hexdigest() # Upload to storage if self.s3_client: await self._upload_to_s3(content, file_path, file.content_type) storage_url = f"s3://{self.bucket_name}/{file_path}" else: await self._upload_to_local(content, file_path) storage_url = str(file_path) return { "file_path": file_path, "storage_url": storage_url, "file_size": len(content), "checksum": checksum, "mime_type": file.content_type, "uploaded_at": datetime.utcnow().isoformat() } except Exception as e: logger.error(f"Error uploading file {file.filename}: {str(e)}") raise async def download_file(self, file_path: str) -> bytes: """ Download a file from storage. """ try: if self.s3_client: return await self._download_from_s3(file_path) else: return await self._download_from_local(file_path) except Exception as e: logger.error(f"Error downloading file {file_path}: {str(e)}") raise async def delete_file(self, file_path: str) -> bool: """ Delete a file from storage. """ try: if self.s3_client: return await self._delete_from_s3(file_path) else: return await self._delete_from_local(file_path) except Exception as e: logger.error(f"Error deleting file {file_path}: {str(e)}") return False async def get_file_info(self, file_path: str) -> Optional[Dict[str, Any]]: """ Get file information from storage. """ try: if self.s3_client: return await self._get_s3_file_info(file_path) else: return await self._get_local_file_info(file_path) except Exception as e: logger.error(f"Error getting file info for {file_path}: {str(e)}") return None async def list_files(self, prefix: str = "", max_keys: int = 1000) -> List[Dict[str, Any]]: """ List files in storage with optional prefix filtering. """ try: if self.s3_client: return await self._list_s3_files(prefix, max_keys) else: return await self._list_local_files(prefix, max_keys) except Exception as e: logger.error(f"Error listing files with prefix {prefix}: {str(e)}") return [] async def _validate_file_security(self, file: UploadFile) -> None: """ Validate file for security threats. """ # Check file size if not file.filename: raise ValueError("No filename provided") # Check file extension allowed_extensions = { '.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.csv', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff' } file_extension = Path(file.filename).suffix.lower() if file_extension not in allowed_extensions: raise ValueError(f"File type {file_extension} not allowed") # Check MIME type if file.content_type: allowed_mime_types = { 'application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'text/plain', 'text/csv', 'image/jpeg', 'image/png', 'image/gif', 'image/bmp', 'image/tiff' } if file.content_type not in allowed_mime_types: raise ValueError(f"MIME type {file.content_type} not allowed") def _generate_file_path(self, document_id: str, filename: str) -> str: """ Generate a secure file path for storage. """ # Create tenant-specific path tenant_path = f"tenants/{self.tenant.id}/documents" # Use document ID and sanitized filename sanitized_filename = Path(filename).name.replace(" ", "_") file_path = f"{tenant_path}/{document_id}_{sanitized_filename}" return file_path async def _upload_to_s3(self, content: bytes, file_path: str, content_type: str) -> None: """ Upload file to S3-compatible storage. """ try: self.s3_client.put_object( Bucket=self.bucket_name, Key=file_path, Body=content, ContentType=content_type, Metadata={ 'tenant_id': str(self.tenant.id), 'uploaded_at': datetime.utcnow().isoformat() } ) except ClientError as e: logger.error(f"S3 upload error: {str(e)}") raise except NoCredentialsError: logger.error("AWS credentials not found") raise async def _upload_to_local(self, content: bytes, file_path: str) -> None: """ Upload file to local storage. """ try: # Create directory structure local_path = Path(f"storage/{file_path}") local_path.parent.mkdir(parents=True, exist_ok=True) # Write file async with aiofiles.open(local_path, 'wb') as f: await f.write(content) except Exception as e: logger.error(f"Local upload error: {str(e)}") raise async def _download_from_s3(self, file_path: str) -> bytes: """ Download file from S3-compatible storage. """ try: response = self.s3_client.get_object( Bucket=self.bucket_name, Key=file_path ) return response['Body'].read() except ClientError as e: logger.error(f"S3 download error: {str(e)}") raise async def _download_from_local(self, file_path: str) -> bytes: """ Download file from local storage. """ try: local_path = Path(f"storage/{file_path}") async with aiofiles.open(local_path, 'rb') as f: return await f.read() except Exception as e: logger.error(f"Local download error: {str(e)}") raise async def _delete_from_s3(self, file_path: str) -> bool: """ Delete file from S3-compatible storage. """ try: self.s3_client.delete_object( Bucket=self.bucket_name, Key=file_path ) return True except ClientError as e: logger.error(f"S3 delete error: {str(e)}") return False async def _delete_from_local(self, file_path: str) -> bool: """ Delete file from local storage. """ try: local_path = Path(f"storage/{file_path}") if local_path.exists(): local_path.unlink() return True return False except Exception as e: logger.error(f"Local delete error: {str(e)}") return False async def _get_s3_file_info(self, file_path: str) -> Optional[Dict[str, Any]]: """ Get file information from S3-compatible storage. """ try: response = self.s3_client.head_object( Bucket=self.bucket_name, Key=file_path ) return { "file_size": response['ContentLength'], "last_modified": response['LastModified'].isoformat(), "content_type": response.get('ContentType'), "metadata": response.get('Metadata', {}) } except ClientError: return None async def _get_local_file_info(self, file_path: str) -> Optional[Dict[str, Any]]: """ Get file information from local storage. """ try: local_path = Path(f"storage/{file_path}") if not local_path.exists(): return None stat = local_path.stat() return { "file_size": stat.st_size, "last_modified": datetime.fromtimestamp(stat.st_mtime).isoformat(), "content_type": mimetypes.guess_type(local_path)[0] } except Exception: return None async def _list_s3_files(self, prefix: str, max_keys: int) -> List[Dict[str, Any]]: """ List files in S3-compatible storage. """ try: tenant_prefix = f"tenants/{self.tenant.id}/documents/{prefix}" response = self.s3_client.list_objects_v2( Bucket=self.bucket_name, Prefix=tenant_prefix, MaxKeys=max_keys ) files = [] for obj in response.get('Contents', []): files.append({ "key": obj['Key'], "size": obj['Size'], "last_modified": obj['LastModified'].isoformat() }) return files except ClientError as e: logger.error(f"S3 list error: {str(e)}") return [] async def _list_local_files(self, prefix: str, max_keys: int) -> List[Dict[str, Any]]: """ List files in local storage. """ try: tenant_path = Path(f"storage/tenants/{self.tenant.id}/documents/{prefix}") if not tenant_path.exists(): return [] files = [] for file_path in tenant_path.rglob("*"): if file_path.is_file(): stat = file_path.stat() files.append({ "key": str(file_path.relative_to(Path("storage"))), "size": stat.st_size, "last_modified": datetime.fromtimestamp(stat.st_mtime).isoformat() }) if len(files) >= max_keys: break return files except Exception as e: logger.error(f"Local list error: {str(e)}") return [] async def cleanup_old_files(self, days_old: int = 30) -> int: """ Clean up old files from storage. """ try: cutoff_date = datetime.utcnow() - timedelta(days=days_old) deleted_count = 0 files = await self.list_files() for file_info in files: last_modified = datetime.fromisoformat(file_info['last_modified']) if last_modified < cutoff_date: if await self.delete_file(file_info['key']): deleted_count += 1 return deleted_count except Exception as e: logger.error(f"Cleanup error: {str(e)}") return 0