""" Document models for the Virtual Board Member AI System. """ from datetime import datetime from typing import Optional from sqlalchemy import Column, String, DateTime, Boolean, Text, Integer, ForeignKey, Table from sqlalchemy.dialects.postgresql import UUID, JSONB from sqlalchemy.orm import relationship import uuid import enum from app.core.database import Base class DocumentType(str, enum.Enum): """Document types.""" BOARD_PACK = "board_pack" MINUTES = "minutes" STRATEGIC_PLAN = "strategic_plan" FINANCIAL_REPORT = "financial_report" COMPLIANCE_REPORT = "compliance_report" POLICY_DOCUMENT = "policy_document" CONTRACT = "contract" PRESENTATION = "presentation" SPREADSHEET = "spreadsheet" OTHER = "other" # Association table for document tags document_tag_association = Table( "document_tag_association", Base.metadata, Column("document_id", UUID(as_uuid=True), ForeignKey("documents.id"), primary_key=True), Column("tag_id", UUID(as_uuid=True), ForeignKey("document_tags.id"), primary_key=True), ) class Document(Base): """Document model.""" __tablename__ = "documents" id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) title = Column(String(500), nullable=False, index=True) description = Column(Text, nullable=True) document_type = Column(String(50), nullable=False, default=DocumentType.OTHER) # File information filename = Column(String(255), nullable=False) file_path = Column(String(500), nullable=False) file_size = Column(Integer, nullable=False) mime_type = Column(String(100), nullable=False) # Processing status processing_status = Column(String(50), default="pending") # pending, processing, completed, failed processing_error = Column(Text, nullable=True) # Content extraction extracted_text = Column(Text, nullable=True) text_embedding = Column(JSONB, nullable=True) # Vector embedding # Metadata document_metadata = Column(JSONB, nullable=True) # Additional metadata source_system = Column(String(100), nullable=True) # SharePoint, email, upload, etc. external_id = Column(String(255), nullable=True) # ID from external system # Relationships uploaded_by = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False) organization_id = Column(UUID(as_uuid=True), nullable=True) # For multi-tenant support # Timestamps created_at = Column(DateTime, default=datetime.utcnow, nullable=False) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) # Relationships versions = relationship("DocumentVersion", back_populates="document", cascade="all, delete-orphan") tags = relationship("DocumentTag", secondary=document_tag_association, back_populates="documents") commitments = relationship("Commitment", back_populates="document") def __repr__(self): return f"" class DocumentVersion(Base): """Document version model.""" __tablename__ = "document_versions" id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) document_id = Column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=False) version_number = Column(Integer, nullable=False) # File information filename = Column(String(255), nullable=False) file_path = Column(String(500), nullable=False) file_size = Column(Integer, nullable=False) checksum = Column(String(64), nullable=False) # SHA-256 hash # Content extracted_text = Column(Text, nullable=True) text_embedding = Column(JSONB, nullable=True) # Metadata change_description = Column(Text, nullable=True) created_by = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False) # Timestamps created_at = Column(DateTime, default=datetime.utcnow, nullable=False) # Relationships document = relationship("Document", back_populates="versions") def __repr__(self): return f"" class DocumentTag(Base): """Document tag model.""" __tablename__ = "document_tags" id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) name = Column(String(100), nullable=False, unique=True, index=True) description = Column(Text, nullable=True) color = Column(String(7), nullable=True) # Hex color code # Timestamps created_at = Column(DateTime, default=datetime.utcnow, nullable=False) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) # Relationships documents = relationship("Document", secondary=document_tag_association, back_populates="tags") def __repr__(self): return f""