import pytest from app.services.llm_service import llm_service from app.services.prompt_manager import prompt_manager from app.services.rag_service import rag_service @pytest.mark.asyncio async def test_prompt_manager_save_and_retrieve(): tenant_id = "test-tenant" await prompt_manager.save_prompt(tenant_id=tenant_id, name="ctx", version="v1", template="You are helpful.") tpl = await prompt_manager.get_prompt(tenant_id=tenant_id, name="ctx", version="v1") assert tpl == "You are helpful." @pytest.mark.asyncio async def test_llm_offline_mode_without_api_key(monkeypatch): # Force no API key monkeypatch.setattr("app.services.llm_service.settings.OPENROUTER_API_KEY", None, raising=False) result = await llm_service.generate_text("Hello", tenant_id="test-tenant") assert result["model"] == "offline" assert "LLM unavailable" in result["text"] @pytest.mark.asyncio async def test_rag_service_basic_flow(monkeypatch): # Mock vector search to return small context async def _fake_search(tenant_id, query, limit=10, chunk_types=None): return [ {"document_id": "doc1", "page_numbers": [1], "chunk_type": "text", "text": "Revenue grew 20% in Q4.", "score": 0.9}, {"document_id": "doc2", "page_numbers": [2], "chunk_type": "table", "text": "Table with KPIs", "score": 0.85}, ] monkeypatch.setattr(rag_service.vector_service, "search_similar", _fake_search) # Mock LLM call to avoid network async def _fake_generate_text(prompt, tenant_id, task="general", max_tokens=None, temperature=None, system_prompt=None): return {"text": "Q4 revenue grew 20% (doc:doc1 p:1).", "model": "offline"} monkeypatch.setattr("app.services.rag_service.llm_service.generate_text", _fake_generate_text) result = await rag_service.answer(tenant_id="test-tenant", query="What happened to revenue in Q4?") assert "revenue" in result["text"].lower() assert result["citations"] and len(result["citations"]) >= 1