""" LLM orchestration service with OpenRouter integration, model routing, and fallback. """ from __future__ import annotations import asyncio import logging from typing import Any, Dict, Optional import httpx from app.core.config import settings from app.core.cache import cache_service logger = logging.getLogger(__name__) class ModelRouter: """Selects models based on task type and tenant overrides.""" DEFAULT_TASK_TO_MODEL: Dict[str, str] = { "extraction": "gpt-4o-mini", "analysis": "gpt-4o-mini", "synthesis": "gpt-4o-mini", "vision": "gpt-4-vision-preview", "classification": "gpt-4o-mini", "general": settings.OPENROUTER_MODEL, } @staticmethod async def choose_model(task: str, tenant_id: str) -> str: task_norm = (task or "general").lower() # Tenant override lookup override_key = f"llm:model:override:{tenant_id}:{task_norm}" override = await cache_service.get(override_key, tenant_id) if isinstance(override, str) and override: return override # Default mapping return ModelRouter.DEFAULT_TASK_TO_MODEL.get(task_norm, settings.OPENROUTER_MODEL) class LLMService: """OpenRouter-backed LLM service with tenant-aware routing and fallback.""" def __init__(self): self.base_url = settings.OPENROUTER_BASE_URL.rstrip("/") self.default_model = settings.OPENROUTER_MODEL self.fallback_model = settings.OPENROUTER_FALLBACK_MODEL def _headers(self) -> Dict[str, str]: return { "Authorization": f"Bearer {self.api_key}" if self.api_key else "", "Content-Type": "application/json", "HTTP-Referer": "https://virtual-board-member.local", "X-Title": "Virtual Board Member AI", } async def _post_chat(self, payload: Dict[str, Any]) -> Dict[str, Any]: async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( f"{self.base_url}/chat/completions", json=payload, headers=self._headers() ) response.raise_for_status() return response.json() def _api_key(self) -> Optional[str]: return getattr(settings, "OPENROUTER_API_KEY", None) async def generate_text( self, prompt: str, *, tenant_id: str, task: str = "general", max_tokens: Optional[int] = None, temperature: Optional[float] = None, system_prompt: Optional[str] = None, ) -> Dict[str, Any]: """Generate text with routing and fallback. Returns a dict with keys: text, model, usage, raw """ api_key = self._api_key() if ( settings.MOCK_LLM_RESPONSES or not api_key or (isinstance(api_key, str) and api_key.strip() in ["", "your-openrouter-api-key"]) ): # Operate in offline mode for environments without OpenRouter keys fake_text = ( "[LLM unavailable] This environment lacks OPENROUTER_API_KEY. " "Returning deterministic offline response." ) return {"text": fake_text, "model": "offline", "usage": {}, "raw": {}} chosen_model = await ModelRouter.choose_model(task, tenant_id) payload = { "model": chosen_model, "messages": self._build_messages(system_prompt, prompt), "max_tokens": max_tokens or settings.OPENROUTER_MAX_TOKENS, "temperature": temperature if temperature is not None else settings.OPENROUTER_TEMPERATURE, } try: data = await self._post_chat(payload) text = data.get("choices", [{}])[0].get("message", {}).get("content", "") return { "text": text, "model": chosen_model, "usage": data.get("usage", {}), "raw": data, } except Exception as primary_error: logger.warning("Primary model failed, attempting fallback: %s", primary_error) # Fallback fallback_model = self.fallback_model try: payload["model"] = fallback_model data = await self._post_chat(payload) text = data.get("choices", [{}])[0].get("message", {}).get("content", "") return { "text": text, "model": fallback_model, "usage": data.get("usage", {}), "raw": data, } except Exception as fallback_error: logger.error("Fallback model also failed: %s", fallback_error) raise @staticmethod def _build_messages(system_prompt: Optional[str], user_prompt: str) -> Any: messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": user_prompt}) return messages # Global instance llm_service = LLMService()