Files
virtual_board_member/app/models/document.py
2025-08-07 16:11:14 -04:00

137 lines
4.9 KiB
Python

"""
Document models for the Virtual Board Member AI System.
"""
from datetime import datetime
from typing import Optional
from sqlalchemy import Column, String, DateTime, Boolean, Text, Integer, ForeignKey, Table
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.orm import relationship
import uuid
import enum
from app.core.database import Base
class DocumentType(str, enum.Enum):
"""Document types."""
BOARD_PACK = "board_pack"
MINUTES = "minutes"
STRATEGIC_PLAN = "strategic_plan"
FINANCIAL_REPORT = "financial_report"
COMPLIANCE_REPORT = "compliance_report"
POLICY_DOCUMENT = "policy_document"
CONTRACT = "contract"
PRESENTATION = "presentation"
SPREADSHEET = "spreadsheet"
OTHER = "other"
# Association table for document tags
document_tag_association = Table(
"document_tag_association",
Base.metadata,
Column("document_id", UUID(as_uuid=True), ForeignKey("documents.id"), primary_key=True),
Column("tag_id", UUID(as_uuid=True), ForeignKey("document_tags.id"), primary_key=True),
)
class Document(Base):
"""Document model."""
__tablename__ = "documents"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
title = Column(String(500), nullable=False, index=True)
description = Column(Text, nullable=True)
document_type = Column(String(50), nullable=False, default=DocumentType.OTHER)
# File information
filename = Column(String(255), nullable=False)
file_path = Column(String(500), nullable=False)
file_size = Column(Integer, nullable=False)
mime_type = Column(String(100), nullable=False)
# Processing status
processing_status = Column(String(50), default="pending") # pending, processing, completed, failed
processing_error = Column(Text, nullable=True)
# Content extraction
extracted_text = Column(Text, nullable=True)
text_embedding = Column(JSONB, nullable=True) # Vector embedding
# Metadata
document_metadata = Column(JSONB, nullable=True) # Additional metadata
source_system = Column(String(100), nullable=True) # SharePoint, email, upload, etc.
external_id = Column(String(255), nullable=True) # ID from external system
# Relationships
uploaded_by = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False)
organization_id = Column(UUID(as_uuid=True), nullable=True) # For multi-tenant support
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
# Relationships
versions = relationship("DocumentVersion", back_populates="document", cascade="all, delete-orphan")
tags = relationship("DocumentTag", secondary=document_tag_association, back_populates="documents")
commitments = relationship("Commitment", back_populates="document")
def __repr__(self):
return f"<Document(id={self.id}, title='{self.title}', type='{self.document_type}')>"
class DocumentVersion(Base):
"""Document version model."""
__tablename__ = "document_versions"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document_id = Column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=False)
version_number = Column(Integer, nullable=False)
# File information
filename = Column(String(255), nullable=False)
file_path = Column(String(500), nullable=False)
file_size = Column(Integer, nullable=False)
checksum = Column(String(64), nullable=False) # SHA-256 hash
# Content
extracted_text = Column(Text, nullable=True)
text_embedding = Column(JSONB, nullable=True)
# Metadata
change_description = Column(Text, nullable=True)
created_by = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
# Relationships
document = relationship("Document", back_populates="versions")
def __repr__(self):
return f"<DocumentVersion(id={self.id}, document_id={self.document_id}, version={self.version_number})>"
class DocumentTag(Base):
"""Document tag model."""
__tablename__ = "document_tags"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
name = Column(String(100), nullable=False, unique=True, index=True)
description = Column(Text, nullable=True)
color = Column(String(7), nullable=True) # Hex color code
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
# Relationships
documents = relationship("Document", secondary=document_tag_association, back_populates="tags")
def __repr__(self):
return f"<DocumentTag(id={self.id}, name='{self.name}')>"