- Implement Autonomous Workflow Engine with dynamic task decomposition - Add Multi-Agent Communication Protocol with message routing - Create Enhanced Reasoning Chains (CoT, ToT, Multi-Step, Parallel, Hybrid) - Add comprehensive REST API endpoints for all Week 5 features - Include 26/26 passing tests with full coverage - Add complete documentation and API guides - Update development plan to mark Week 5 as completed Features: - Dynamic task decomposition and parallel execution - Agent registration, messaging, and coordination - 5 reasoning methods with validation and learning - Robust error handling and monitoring - Multi-tenant support and security - Production-ready architecture Files added/modified: - app/services/autonomous_workflow_engine.py - app/services/agent_communication.py - app/services/enhanced_reasoning.py - app/api/v1/endpoints/week5_features.py - tests/test_week5_features.py - docs/week5_api_documentation.md - docs/week5_readme.md - WEEK5_COMPLETION_SUMMARY.md - DEVELOPMENT_PLAN.md (updated) All tests passing: 26/26
146 lines
5.0 KiB
Python
146 lines
5.0 KiB
Python
"""
|
|
LLM orchestration service with OpenRouter integration, model routing, and fallback.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Any, Dict, Optional
|
|
|
|
import httpx
|
|
|
|
from app.core.config import settings
|
|
from app.core.cache import cache_service
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ModelRouter:
|
|
"""Selects models based on task type and tenant overrides."""
|
|
|
|
DEFAULT_TASK_TO_MODEL: Dict[str, str] = {
|
|
"extraction": "gpt-4o-mini",
|
|
"analysis": "gpt-4o-mini",
|
|
"synthesis": "gpt-4o-mini",
|
|
"vision": "gpt-4-vision-preview",
|
|
"classification": "gpt-4o-mini",
|
|
"general": settings.OPENROUTER_MODEL,
|
|
}
|
|
|
|
@staticmethod
|
|
async def choose_model(task: str, tenant_id: str) -> str:
|
|
task_norm = (task or "general").lower()
|
|
# Tenant override lookup
|
|
override_key = f"llm:model:override:{tenant_id}:{task_norm}"
|
|
override = await cache_service.get(override_key, tenant_id)
|
|
if isinstance(override, str) and override:
|
|
return override
|
|
# Default mapping
|
|
return ModelRouter.DEFAULT_TASK_TO_MODEL.get(task_norm, settings.OPENROUTER_MODEL)
|
|
|
|
|
|
class LLMService:
|
|
"""OpenRouter-backed LLM service with tenant-aware routing and fallback."""
|
|
|
|
def __init__(self):
|
|
self.base_url = settings.OPENROUTER_BASE_URL.rstrip("/")
|
|
self.default_model = settings.OPENROUTER_MODEL
|
|
self.fallback_model = settings.OPENROUTER_FALLBACK_MODEL
|
|
|
|
def _headers(self) -> Dict[str, str]:
|
|
return {
|
|
"Authorization": f"Bearer {self.api_key}" if self.api_key else "",
|
|
"Content-Type": "application/json",
|
|
"HTTP-Referer": "https://virtual-board-member.local",
|
|
"X-Title": "Virtual Board Member AI",
|
|
}
|
|
|
|
async def _post_chat(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
response = await client.post(
|
|
f"{self.base_url}/chat/completions", json=payload, headers=self._headers()
|
|
)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
def _api_key(self) -> Optional[str]:
|
|
return getattr(settings, "OPENROUTER_API_KEY", None)
|
|
|
|
async def generate_text(
|
|
self,
|
|
prompt: str,
|
|
*,
|
|
tenant_id: str,
|
|
task: str = "general",
|
|
max_tokens: Optional[int] = None,
|
|
temperature: Optional[float] = None,
|
|
system_prompt: Optional[str] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Generate text with routing and fallback.
|
|
|
|
Returns a dict with keys: text, model, usage, raw
|
|
"""
|
|
api_key = self._api_key()
|
|
if (
|
|
settings.MOCK_LLM_RESPONSES
|
|
or not api_key
|
|
or (isinstance(api_key, str) and api_key.strip() in ["", "your-openrouter-api-key"])
|
|
):
|
|
# Operate in offline mode for environments without OpenRouter keys
|
|
fake_text = (
|
|
"[LLM unavailable] This environment lacks OPENROUTER_API_KEY. "
|
|
"Returning deterministic offline response."
|
|
)
|
|
return {"text": fake_text, "model": "offline", "usage": {}, "raw": {}}
|
|
|
|
chosen_model = await ModelRouter.choose_model(task, tenant_id)
|
|
payload = {
|
|
"model": chosen_model,
|
|
"messages": self._build_messages(system_prompt, prompt),
|
|
"max_tokens": max_tokens or settings.OPENROUTER_MAX_TOKENS,
|
|
"temperature": temperature if temperature is not None else settings.OPENROUTER_TEMPERATURE,
|
|
}
|
|
|
|
try:
|
|
data = await self._post_chat(payload)
|
|
text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
return {
|
|
"text": text,
|
|
"model": chosen_model,
|
|
"usage": data.get("usage", {}),
|
|
"raw": data,
|
|
}
|
|
except Exception as primary_error:
|
|
logger.warning("Primary model failed, attempting fallback: %s", primary_error)
|
|
# Fallback
|
|
fallback_model = self.fallback_model
|
|
try:
|
|
payload["model"] = fallback_model
|
|
data = await self._post_chat(payload)
|
|
text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
return {
|
|
"text": text,
|
|
"model": fallback_model,
|
|
"usage": data.get("usage", {}),
|
|
"raw": data,
|
|
}
|
|
except Exception as fallback_error:
|
|
logger.error("Fallback model also failed: %s", fallback_error)
|
|
raise
|
|
|
|
@staticmethod
|
|
def _build_messages(system_prompt: Optional[str], user_prompt: str) -> Any:
|
|
messages = []
|
|
if system_prompt:
|
|
messages.append({"role": "system", "content": system_prompt})
|
|
messages.append({"role": "user", "content": user_prompt})
|
|
return messages
|
|
|
|
|
|
# Global instance
|
|
llm_service = LLMService()
|
|
|
|
|