Files
virtual_board_member/app/services/llm_service.py
Jonathan Pressnell 5b5714e4c2 feat: Complete Week 5 implementation - Agentic RAG & Multi-Agent Orchestration
- Implement Autonomous Workflow Engine with dynamic task decomposition
- Add Multi-Agent Communication Protocol with message routing
- Create Enhanced Reasoning Chains (CoT, ToT, Multi-Step, Parallel, Hybrid)
- Add comprehensive REST API endpoints for all Week 5 features
- Include 26/26 passing tests with full coverage
- Add complete documentation and API guides
- Update development plan to mark Week 5 as completed

Features:
- Dynamic task decomposition and parallel execution
- Agent registration, messaging, and coordination
- 5 reasoning methods with validation and learning
- Robust error handling and monitoring
- Multi-tenant support and security
- Production-ready architecture

Files added/modified:
- app/services/autonomous_workflow_engine.py
- app/services/agent_communication.py
- app/services/enhanced_reasoning.py
- app/api/v1/endpoints/week5_features.py
- tests/test_week5_features.py
- docs/week5_api_documentation.md
- docs/week5_readme.md
- WEEK5_COMPLETION_SUMMARY.md
- DEVELOPMENT_PLAN.md (updated)

All tests passing: 26/26
2025-08-10 09:25:46 -04:00

146 lines
5.0 KiB
Python

"""
LLM orchestration service with OpenRouter integration, model routing, and fallback.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any, Dict, Optional
import httpx
from app.core.config import settings
from app.core.cache import cache_service
logger = logging.getLogger(__name__)
class ModelRouter:
"""Selects models based on task type and tenant overrides."""
DEFAULT_TASK_TO_MODEL: Dict[str, str] = {
"extraction": "gpt-4o-mini",
"analysis": "gpt-4o-mini",
"synthesis": "gpt-4o-mini",
"vision": "gpt-4-vision-preview",
"classification": "gpt-4o-mini",
"general": settings.OPENROUTER_MODEL,
}
@staticmethod
async def choose_model(task: str, tenant_id: str) -> str:
task_norm = (task or "general").lower()
# Tenant override lookup
override_key = f"llm:model:override:{tenant_id}:{task_norm}"
override = await cache_service.get(override_key, tenant_id)
if isinstance(override, str) and override:
return override
# Default mapping
return ModelRouter.DEFAULT_TASK_TO_MODEL.get(task_norm, settings.OPENROUTER_MODEL)
class LLMService:
"""OpenRouter-backed LLM service with tenant-aware routing and fallback."""
def __init__(self):
self.base_url = settings.OPENROUTER_BASE_URL.rstrip("/")
self.default_model = settings.OPENROUTER_MODEL
self.fallback_model = settings.OPENROUTER_FALLBACK_MODEL
def _headers(self) -> Dict[str, str]:
return {
"Authorization": f"Bearer {self.api_key}" if self.api_key else "",
"Content-Type": "application/json",
"HTTP-Referer": "https://virtual-board-member.local",
"X-Title": "Virtual Board Member AI",
}
async def _post_chat(self, payload: Dict[str, Any]) -> Dict[str, Any]:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{self.base_url}/chat/completions", json=payload, headers=self._headers()
)
response.raise_for_status()
return response.json()
def _api_key(self) -> Optional[str]:
return getattr(settings, "OPENROUTER_API_KEY", None)
async def generate_text(
self,
prompt: str,
*,
tenant_id: str,
task: str = "general",
max_tokens: Optional[int] = None,
temperature: Optional[float] = None,
system_prompt: Optional[str] = None,
) -> Dict[str, Any]:
"""Generate text with routing and fallback.
Returns a dict with keys: text, model, usage, raw
"""
api_key = self._api_key()
if (
settings.MOCK_LLM_RESPONSES
or not api_key
or (isinstance(api_key, str) and api_key.strip() in ["", "your-openrouter-api-key"])
):
# Operate in offline mode for environments without OpenRouter keys
fake_text = (
"[LLM unavailable] This environment lacks OPENROUTER_API_KEY. "
"Returning deterministic offline response."
)
return {"text": fake_text, "model": "offline", "usage": {}, "raw": {}}
chosen_model = await ModelRouter.choose_model(task, tenant_id)
payload = {
"model": chosen_model,
"messages": self._build_messages(system_prompt, prompt),
"max_tokens": max_tokens or settings.OPENROUTER_MAX_TOKENS,
"temperature": temperature if temperature is not None else settings.OPENROUTER_TEMPERATURE,
}
try:
data = await self._post_chat(payload)
text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
return {
"text": text,
"model": chosen_model,
"usage": data.get("usage", {}),
"raw": data,
}
except Exception as primary_error:
logger.warning("Primary model failed, attempting fallback: %s", primary_error)
# Fallback
fallback_model = self.fallback_model
try:
payload["model"] = fallback_model
data = await self._post_chat(payload)
text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
return {
"text": text,
"model": fallback_model,
"usage": data.get("usage", {}),
"raw": data,
}
except Exception as fallback_error:
logger.error("Fallback model also failed: %s", fallback_error)
raise
@staticmethod
def _build_messages(system_prompt: Optional[str], user_prompt: str) -> Any:
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": user_prompt})
return messages
# Global instance
llm_service = LLMService()