137 lines
4.9 KiB
Python
137 lines
4.9 KiB
Python
"""
|
|
Document models for the Virtual Board Member AI System.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
from sqlalchemy import Column, String, DateTime, Boolean, Text, Integer, ForeignKey, Table
|
|
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
|
from sqlalchemy.orm import relationship
|
|
import uuid
|
|
import enum
|
|
|
|
from app.core.database import Base
|
|
|
|
|
|
class DocumentType(str, enum.Enum):
|
|
"""Document types."""
|
|
BOARD_PACK = "board_pack"
|
|
MINUTES = "minutes"
|
|
STRATEGIC_PLAN = "strategic_plan"
|
|
FINANCIAL_REPORT = "financial_report"
|
|
COMPLIANCE_REPORT = "compliance_report"
|
|
POLICY_DOCUMENT = "policy_document"
|
|
CONTRACT = "contract"
|
|
PRESENTATION = "presentation"
|
|
SPREADSHEET = "spreadsheet"
|
|
OTHER = "other"
|
|
|
|
|
|
# Association table for document tags
|
|
document_tag_association = Table(
|
|
"document_tag_association",
|
|
Base.metadata,
|
|
Column("document_id", UUID(as_uuid=True), ForeignKey("documents.id"), primary_key=True),
|
|
Column("tag_id", UUID(as_uuid=True), ForeignKey("document_tags.id"), primary_key=True),
|
|
)
|
|
|
|
|
|
class Document(Base):
|
|
"""Document model."""
|
|
|
|
__tablename__ = "documents"
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
title = Column(String(500), nullable=False, index=True)
|
|
description = Column(Text, nullable=True)
|
|
document_type = Column(String(50), nullable=False, default=DocumentType.OTHER)
|
|
|
|
# File information
|
|
filename = Column(String(255), nullable=False)
|
|
file_path = Column(String(500), nullable=False)
|
|
file_size = Column(Integer, nullable=False)
|
|
mime_type = Column(String(100), nullable=False)
|
|
|
|
# Processing status
|
|
processing_status = Column(String(50), default="pending") # pending, processing, completed, failed
|
|
processing_error = Column(Text, nullable=True)
|
|
|
|
# Content extraction
|
|
extracted_text = Column(Text, nullable=True)
|
|
text_embedding = Column(JSONB, nullable=True) # Vector embedding
|
|
|
|
# Metadata
|
|
document_metadata = Column(JSONB, nullable=True) # Additional metadata
|
|
source_system = Column(String(100), nullable=True) # SharePoint, email, upload, etc.
|
|
external_id = Column(String(255), nullable=True) # ID from external system
|
|
|
|
# Relationships
|
|
uploaded_by = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False)
|
|
organization_id = Column(UUID(as_uuid=True), nullable=True) # For multi-tenant support
|
|
|
|
# Timestamps
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
# Relationships
|
|
versions = relationship("DocumentVersion", back_populates="document", cascade="all, delete-orphan")
|
|
tags = relationship("DocumentTag", secondary=document_tag_association, back_populates="documents")
|
|
commitments = relationship("Commitment", back_populates="document")
|
|
|
|
def __repr__(self):
|
|
return f"<Document(id={self.id}, title='{self.title}', type='{self.document_type}')>"
|
|
|
|
|
|
class DocumentVersion(Base):
|
|
"""Document version model."""
|
|
|
|
__tablename__ = "document_versions"
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
document_id = Column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=False)
|
|
version_number = Column(Integer, nullable=False)
|
|
|
|
# File information
|
|
filename = Column(String(255), nullable=False)
|
|
file_path = Column(String(500), nullable=False)
|
|
file_size = Column(Integer, nullable=False)
|
|
checksum = Column(String(64), nullable=False) # SHA-256 hash
|
|
|
|
# Content
|
|
extracted_text = Column(Text, nullable=True)
|
|
text_embedding = Column(JSONB, nullable=True)
|
|
|
|
# Metadata
|
|
change_description = Column(Text, nullable=True)
|
|
created_by = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False)
|
|
|
|
# Timestamps
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
# Relationships
|
|
document = relationship("Document", back_populates="versions")
|
|
|
|
def __repr__(self):
|
|
return f"<DocumentVersion(id={self.id}, document_id={self.document_id}, version={self.version_number})>"
|
|
|
|
|
|
class DocumentTag(Base):
|
|
"""Document tag model."""
|
|
|
|
__tablename__ = "document_tags"
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
name = Column(String(100), nullable=False, unique=True, index=True)
|
|
description = Column(Text, nullable=True)
|
|
color = Column(String(7), nullable=True) # Hex color code
|
|
|
|
# Timestamps
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
# Relationships
|
|
documents = relationship("Document", secondary=document_tag_association, back_populates="tags")
|
|
|
|
def __repr__(self):
|
|
return f"<DocumentTag(id={self.id}, name='{self.name}')>"
|