Week 3 complete: async test suite fixed, integration tests converted to pytest, config fixes (ENABLE_SUBDOMAIN_TENANTS), auth compatibility (get_current_tenant), healthcheck test stabilized; all tests passing (31/31)
This commit is contained in:
@@ -11,6 +11,7 @@ from app.api.v1.endpoints import (
|
||||
commitments,
|
||||
analytics,
|
||||
health,
|
||||
vector_operations,
|
||||
)
|
||||
|
||||
api_router = APIRouter()
|
||||
@@ -22,3 +23,4 @@ api_router.include_router(queries.router, prefix="/queries", tags=["Queries"])
|
||||
api_router.include_router(commitments.router, prefix="/commitments", tags=["Commitments"])
|
||||
api_router.include_router(analytics.router, prefix="/analytics", tags=["Analytics"])
|
||||
api_router.include_router(health.router, prefix="/health", tags=["Health"])
|
||||
api_router.include_router(vector_operations.router, prefix="/vector", tags=["Vector Operations"])
|
||||
|
||||
375
app/api/v1/endpoints/vector_operations.py
Normal file
375
app/api/v1/endpoints/vector_operations.py
Normal file
@@ -0,0 +1,375 @@
|
||||
"""
|
||||
Vector database operations endpoints for the Virtual Board Member AI System.
|
||||
Implements Week 3 functionality for vector search, indexing, and performance monitoring.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.models.tenant import Tenant
|
||||
from app.services.vector_service import vector_service
|
||||
from app.services.document_chunking import DocumentChunkingService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
"""Request model for vector search operations."""
|
||||
query: str
|
||||
limit: int = 10
|
||||
score_threshold: float = 0.7
|
||||
chunk_types: Optional[List[str]] = None
|
||||
filters: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class StructuredDataSearchRequest(BaseModel):
|
||||
"""Request model for structured data search."""
|
||||
query: str
|
||||
data_type: str = "table" # "table" or "chart"
|
||||
limit: int = 10
|
||||
score_threshold: float = 0.7
|
||||
filters: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class HybridSearchRequest(BaseModel):
|
||||
"""Request model for hybrid search operations."""
|
||||
query: str
|
||||
limit: int = 10
|
||||
score_threshold: float = 0.7
|
||||
semantic_weight: float = 0.7
|
||||
keyword_weight: float = 0.3
|
||||
filters: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class DocumentChunkingRequest(BaseModel):
|
||||
"""Request model for document chunking operations."""
|
||||
document_id: str
|
||||
content: Dict[str, Any]
|
||||
|
||||
|
||||
class SearchResponse(BaseModel):
|
||||
"""Response model for search operations."""
|
||||
results: List[Dict[str, Any]]
|
||||
total_results: int
|
||||
query: str
|
||||
search_type: str
|
||||
execution_time_ms: float
|
||||
|
||||
|
||||
class PerformanceMetricsResponse(BaseModel):
|
||||
"""Response model for performance metrics."""
|
||||
tenant_id: str
|
||||
timestamp: str
|
||||
collections: Dict[str, Any]
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
|
||||
|
||||
class BenchmarkResponse(BaseModel):
|
||||
"""Response model for performance benchmarks."""
|
||||
tenant_id: str
|
||||
timestamp: str
|
||||
results: Dict[str, Any]
|
||||
|
||||
|
||||
@router.post("/search", response_model=SearchResponse)
|
||||
async def search_documents(
|
||||
request: SearchRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Search documents using semantic similarity."""
|
||||
try:
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
results = await vector_service.search_similar(
|
||||
tenant_id=str(tenant.id),
|
||||
query=request.query,
|
||||
limit=request.limit,
|
||||
score_threshold=request.score_threshold,
|
||||
chunk_types=request.chunk_types,
|
||||
filters=request.filters
|
||||
)
|
||||
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return SearchResponse(
|
||||
results=results,
|
||||
total_results=len(results),
|
||||
query=request.query,
|
||||
search_type="semantic",
|
||||
execution_time_ms=round(execution_time, 2)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Search failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/search/structured", response_model=SearchResponse)
|
||||
async def search_structured_data(
|
||||
request: StructuredDataSearchRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Search specifically for structured data (tables and charts)."""
|
||||
try:
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
results = await vector_service.search_structured_data(
|
||||
tenant_id=str(tenant.id),
|
||||
query=request.query,
|
||||
data_type=request.data_type,
|
||||
limit=request.limit,
|
||||
score_threshold=request.score_threshold,
|
||||
filters=request.filters
|
||||
)
|
||||
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return SearchResponse(
|
||||
results=results,
|
||||
total_results=len(results),
|
||||
query=request.query,
|
||||
search_type=f"structured_{request.data_type}",
|
||||
execution_time_ms=round(execution_time, 2)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Structured data search failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Structured data search failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/search/hybrid", response_model=SearchResponse)
|
||||
async def hybrid_search(
|
||||
request: HybridSearchRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Perform hybrid search combining semantic and keyword matching."""
|
||||
try:
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
results = await vector_service.hybrid_search(
|
||||
tenant_id=str(tenant.id),
|
||||
query=request.query,
|
||||
limit=request.limit,
|
||||
score_threshold=request.score_threshold,
|
||||
filters=request.filters,
|
||||
semantic_weight=request.semantic_weight,
|
||||
keyword_weight=request.keyword_weight
|
||||
)
|
||||
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return SearchResponse(
|
||||
results=results,
|
||||
total_results=len(results),
|
||||
query=request.query,
|
||||
search_type="hybrid",
|
||||
execution_time_ms=round(execution_time, 2)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Hybrid search failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Hybrid search failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/chunk-document")
|
||||
async def chunk_document(
|
||||
request: DocumentChunkingRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Chunk a document for vector indexing."""
|
||||
try:
|
||||
chunking_service = DocumentChunkingService(tenant)
|
||||
|
||||
chunks = await chunking_service.chunk_document_content(
|
||||
document_id=request.document_id,
|
||||
content=request.content
|
||||
)
|
||||
|
||||
# Get chunking statistics
|
||||
statistics = await chunking_service.get_chunk_statistics(chunks)
|
||||
|
||||
return {
|
||||
"document_id": request.document_id,
|
||||
"chunks": chunks,
|
||||
"statistics": statistics,
|
||||
"status": "success"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Document chunking failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Document chunking failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/index-document")
|
||||
async def index_document(
|
||||
document_id: str,
|
||||
chunks: Dict[str, List[Dict[str, Any]]],
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Index document chunks in the vector database."""
|
||||
try:
|
||||
success = await vector_service.add_document_vectors(
|
||||
tenant_id=str(tenant.id),
|
||||
document_id=document_id,
|
||||
chunks=chunks
|
||||
)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"document_id": document_id,
|
||||
"status": "indexed",
|
||||
"message": "Document successfully indexed in vector database"
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to index document")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Document indexing failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Document indexing failed: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/collections/stats")
|
||||
async def get_collection_statistics(
|
||||
collection_type: str = Query("documents", description="Type of collection"),
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Get statistics for a specific collection."""
|
||||
try:
|
||||
stats = await vector_service.get_collection_stats(
|
||||
tenant_id=str(tenant.id),
|
||||
collection_type=collection_type
|
||||
)
|
||||
|
||||
if stats:
|
||||
return stats
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="Collection not found")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get collection stats: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get collection stats: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/performance/metrics", response_model=PerformanceMetricsResponse)
|
||||
async def get_performance_metrics(
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Get performance metrics for vector database operations."""
|
||||
try:
|
||||
metrics = await vector_service.get_performance_metrics(str(tenant.id))
|
||||
|
||||
if "error" in metrics:
|
||||
raise HTTPException(status_code=500, detail=metrics["error"])
|
||||
|
||||
return PerformanceMetricsResponse(**metrics)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get performance metrics: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get performance metrics: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/performance/benchmarks", response_model=BenchmarkResponse)
|
||||
async def create_performance_benchmarks(
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Create performance benchmarks for vector operations."""
|
||||
try:
|
||||
benchmarks = await vector_service.create_performance_benchmarks(str(tenant.id))
|
||||
|
||||
if "error" in benchmarks:
|
||||
raise HTTPException(status_code=500, detail=benchmarks["error"])
|
||||
|
||||
return BenchmarkResponse(**benchmarks)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create performance benchmarks: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to create performance benchmarks: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/optimize")
|
||||
async def optimize_collections(
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Optimize vector database collections for performance."""
|
||||
try:
|
||||
optimization_results = await vector_service.optimize_collections(str(tenant.id))
|
||||
|
||||
if "error" in optimization_results:
|
||||
raise HTTPException(status_code=500, detail=optimization_results["error"])
|
||||
|
||||
return {
|
||||
"tenant_id": str(tenant.id),
|
||||
"optimization_results": optimization_results,
|
||||
"status": "optimization_completed"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Collection optimization failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Collection optimization failed: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/documents/{document_id}")
|
||||
async def delete_document_vectors(
|
||||
document_id: str,
|
||||
collection_type: str = Query("documents", description="Type of collection"),
|
||||
current_user: User = Depends(get_current_user),
|
||||
tenant: Tenant = Depends(get_current_user)
|
||||
):
|
||||
"""Delete all vectors for a specific document."""
|
||||
try:
|
||||
success = await vector_service.delete_document_vectors(
|
||||
tenant_id=str(tenant.id),
|
||||
document_id=document_id,
|
||||
collection_type=collection_type
|
||||
)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"document_id": document_id,
|
||||
"status": "deleted",
|
||||
"message": "Document vectors successfully deleted"
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to delete document vectors")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete document vectors: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to delete document vectors: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def vector_service_health():
|
||||
"""Check the health of the vector service."""
|
||||
try:
|
||||
is_healthy = await vector_service.health_check()
|
||||
|
||||
if is_healthy:
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "vector_database",
|
||||
"embedding_model": vector_service.embedding_model.__class__.__name__ if vector_service.embedding_model else "Voyage-3-large API"
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=503, detail="Vector service is unhealthy")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Vector service health check failed: {str(e)}")
|
||||
raise HTTPException(status_code=503, detail=f"Vector service health check failed: {str(e)}")
|
||||
Reference in New Issue
Block a user