Files
virtual_board_member/tests/test_week5_agentic_rag.py
Jonathan Pressnell 5b5714e4c2 feat: Complete Week 5 implementation - Agentic RAG & Multi-Agent Orchestration
- Implement Autonomous Workflow Engine with dynamic task decomposition
- Add Multi-Agent Communication Protocol with message routing
- Create Enhanced Reasoning Chains (CoT, ToT, Multi-Step, Parallel, Hybrid)
- Add comprehensive REST API endpoints for all Week 5 features
- Include 26/26 passing tests with full coverage
- Add complete documentation and API guides
- Update development plan to mark Week 5 as completed

Features:
- Dynamic task decomposition and parallel execution
- Agent registration, messaging, and coordination
- 5 reasoning methods with validation and learning
- Robust error handling and monitoring
- Multi-tenant support and security
- Production-ready architecture

Files added/modified:
- app/services/autonomous_workflow_engine.py
- app/services/agent_communication.py
- app/services/enhanced_reasoning.py
- app/api/v1/endpoints/week5_features.py
- tests/test_week5_features.py
- docs/week5_api_documentation.md
- docs/week5_readme.md
- WEEK5_COMPLETION_SUMMARY.md
- DEVELOPMENT_PLAN.md (updated)

All tests passing: 26/26
2025-08-10 09:25:46 -04:00

574 lines
23 KiB
Python

"""
Week 5: Agentic RAG & Multi-Agent Orchestration Tests
Tests for state-of-the-art autonomous agent-based retrieval and reasoning system.
"""
import pytest
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from typing import Dict, Any, List
from app.services.agentic_rag_service import (
AgenticRAGService,
Agent,
ResearchAgent,
AnalysisAgent,
SynthesisAgent,
AgentType,
ReasoningType,
AgentTask,
ReasoningStep
)
from app.services.vector_service import VectorService
from app.services.llm_service import llm_service
from app.core.cache import cache_service
class TestAgenticRAGFoundation:
"""Test the foundational agentic RAG system components."""
@pytest.fixture
async def agentic_rag_service(self):
"""Create a test instance of AgenticRAGService."""
with patch('app.services.agentic_rag_service.VectorService') as mock_vector_service:
mock_vector_service.return_value = AsyncMock()
service = AgenticRAGService()
yield service
@pytest.fixture
def sample_task(self):
"""Create a sample agent task for testing."""
return AgentTask(
id="test-task-1",
agent_type=AgentType.RESEARCH,
description="Test research task",
input_data={"query": "What are our Q4 financial results?"},
dependencies=[],
priority=1,
created_at=asyncio.get_event_loop().time()
)
async def test_agent_initialization(self, agentic_rag_service):
"""Test that all agents initialize correctly."""
# Verify all required agents are present
assert hasattr(agentic_rag_service, 'agents')
assert AgentType.RESEARCH in agentic_rag_service.agents
assert AgentType.ANALYSIS in agentic_rag_service.agents
assert AgentType.SYNTHESIS in agentic_rag_service.agents
# Verify agent types
assert isinstance(agentic_rag_service.agents[AgentType.RESEARCH], ResearchAgent)
assert isinstance(agentic_rag_service.agents[AgentType.ANALYSIS], AnalysisAgent)
assert isinstance(agentic_rag_service.agents[AgentType.SYNTHESIS], SynthesisAgent)
async def test_agent_memory_management(self, agentic_rag_service):
"""Test agent memory operations."""
research_agent = agentic_rag_service.agents[AgentType.RESEARCH]
# Test memory update
research_agent.update_memory("test_key", "test_value")
memory = research_agent.get_memory()
assert memory["test_key"] == "test_value"
# Test memory isolation
analysis_agent = agentic_rag_service.agents[AgentType.ANALYSIS]
analysis_agent.update_memory("analysis_key", "analysis_value")
research_memory = research_agent.get_memory()
analysis_memory = analysis_agent.get_memory()
assert "test_key" in research_memory
assert "test_key" not in analysis_memory
assert "analysis_key" in analysis_memory
async def test_agent_learning_capabilities(self, agentic_rag_service):
"""Test agent learning from feedback."""
research_agent = agentic_rag_service.agents[AgentType.RESEARCH]
# Test learning history
feedback = {"accuracy": 0.9, "relevance": 0.85, "user_satisfaction": 0.95}
await research_agent.learn(feedback)
assert len(research_agent.learning_history) == 1
assert research_agent.learning_history[0]["feedback"] == feedback
class TestResearchAgent:
"""Test the Research Agent's autonomous retrieval capabilities."""
@pytest.fixture
async def research_agent(self):
"""Create a test instance of ResearchAgent."""
with patch('app.services.agentic_rag_service.VectorService') as mock_vector_service:
mock_vector_service.return_value = AsyncMock()
agent = ResearchAgent(mock_vector_service.return_value)
yield agent
async def test_autonomous_retrieval_strategy_selection(self, research_agent):
"""Test that the agent can autonomously select retrieval strategies."""
query = "What are our Q4 financial results?"
context = {"tenant_id": "test-tenant", "user_role": "board_member"}
strategy = await research_agent._determine_retrieval_strategy(query, context)
# Should return a valid strategy
assert strategy in ["semantic", "hybrid", "structured", "multi_modal"]
async def test_query_analysis_capabilities(self, research_agent):
"""Test query analysis and intent classification."""
query = "Compare Q3 and Q4 financial performance"
analysis = await research_agent._analyze_query(query)
# Should return structured analysis
assert "intent" in analysis
assert "complexity" in analysis
assert "entities" in analysis
assert "context_requirements" in analysis
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_semantic_retrieval(self, mock_llm, research_agent):
"""Test semantic retrieval functionality."""
mock_llm.return_value = "Generated semantic search query"
query = "Financial performance analysis"
context = {"tenant_id": "test-tenant"}
results = await research_agent._semantic_retrieval(query, context)
# Should return list of results
assert isinstance(results, list)
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_autonomous_filtering(self, mock_llm, research_agent):
"""Test autonomous filtering of retrieval results."""
mock_llm.return_value = "Relevant content about financial performance"
# Mock retrieval results
mock_results = [
{"content": "Q4 revenue increased by 15%", "score": 0.9},
{"content": "Weather forecast for next week", "score": 0.3},
{"content": "Q4 profit margins improved", "score": 0.85}
]
query = "Q4 financial results"
filtered_results = await research_agent._autonomous_filtering(mock_results, query)
# Should filter out irrelevant results
assert len(filtered_results) <= len(mock_results)
# Should maintain high-scoring relevant results
assert any("Q4" in result["content"] for result in filtered_results)
class TestAnalysisAgent:
"""Test the Analysis Agent's advanced reasoning capabilities."""
@pytest.fixture
async def analysis_agent(self):
"""Create a test instance of AnalysisAgent."""
agent = AnalysisAgent()
yield agent
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_chain_of_thought_analysis(self, mock_llm, analysis_agent):
"""Test Chain of Thought reasoning."""
mock_llm.return_value = """
Step 1: Analyze Q4 revenue data
Step 2: Compare with Q3 performance
Step 3: Identify key drivers
Step 4: Assess market conditions
Conclusion: Q4 shows strong growth due to new product launch
"""
query = "Analyze Q4 financial performance"
data = [{"content": "Q4 revenue: $10M", "source": "financial_report.pdf"}]
result = await analysis_agent._chain_of_thought_analysis(query, data)
# Should return structured analysis
assert "reasoning_steps" in result
assert "conclusion" in result
assert "confidence" in result
assert result["confidence"] > 0.0
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_tree_of_thoughts_analysis(self, mock_llm, analysis_agent):
"""Test Tree of Thoughts reasoning."""
mock_llm.side_effect = [
"Path 1: Revenue growth analysis",
"Path 2: Cost structure analysis",
"Path 3: Market share analysis",
"Evaluation: Path 1 is most relevant",
"Synthesis: Combined insights from all paths"
]
query = "Comprehensive Q4 analysis"
data = [{"content": "Q4 financial data", "source": "report.pdf"}]
result = await analysis_agent._tree_of_thoughts_analysis(query, data)
# Should return multi-path analysis
assert "reasoning_paths" in result
assert "evaluation" in result
assert "synthesis" in result
assert len(result["reasoning_paths"]) > 1
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_multi_step_analysis(self, mock_llm, analysis_agent):
"""Test Multi-Step reasoning."""
mock_llm.side_effect = [
"Step 1: Data validation - All data is accurate",
"Step 2: Trend analysis - Revenue growing 15% QoQ",
"Step 3: Risk assessment - Low risk factors identified",
"Step 4: Future projection - Continued growth expected"
]
query = "Multi-step financial analysis"
data = [{"content": "Financial data", "source": "data.pdf"}]
result = await analysis_agent._multi_step_analysis(query, data)
# Should return step-by-step analysis
assert "steps" in result
assert "validation" in result
assert "final_analysis" in result
assert len(result["steps"]) > 1
async def test_reasoning_path_evaluation(self, analysis_agent):
"""Test reasoning path evaluation and ranking."""
paths = [
{"content": "Path 1 analysis", "confidence": 0.8},
{"content": "Path 2 analysis", "confidence": 0.9},
{"content": "Path 3 analysis", "confidence": 0.7}
]
query = "Test query"
context = "Test context"
evaluation = await analysis_agent._evaluate_reasoning_path(paths[0], query, context)
# Should return evaluation metrics
assert "quality_score" in evaluation
assert "relevance_score" in evaluation
assert "overall_score" in evaluation
class TestSynthesisAgent:
"""Test the Synthesis Agent's response generation capabilities."""
@pytest.fixture
async def synthesis_agent(self):
"""Create a test instance of SynthesisAgent."""
agent = SynthesisAgent()
yield agent
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_information_synthesis(self, mock_llm, synthesis_agent):
"""Test information synthesis from multiple sources."""
mock_llm.return_value = "Synthesized response combining research and analysis"
query = "Q4 financial summary"
research_results = {"data": "Research data", "confidence": 0.9}
analysis_results = {"insights": "Analysis insights", "confidence": 0.85}
context = {"tenant_id": "test-tenant"}
result = await synthesis_agent._synthesize_information(
query, research_results, analysis_results, context
)
# Should return synthesized information
assert "synthesis" in result
assert "key_insights" in result
assert "confidence" in result
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_response_generation(self, mock_llm, synthesis_agent):
"""Test final response generation."""
mock_llm.return_value = "Generated response with proper formatting and citations"
query = "Financial performance summary"
synthesis = {"content": "Synthesized content", "insights": ["insight1", "insight2"]}
context = {"user_role": "board_member"}
response = await synthesis_agent._generate_response(query, synthesis, context)
# Should return well-formatted response
assert isinstance(response, str)
assert len(response) > 0
async def test_metadata_addition(self, synthesis_agent):
"""Test metadata addition to responses."""
response = "Q4 revenue increased by 15%"
research_results = {"sources": ["report1.pdf", "report2.pdf"]}
analysis_results = {"confidence": 0.9, "methodology": "CoT"}
metadata = await synthesis_agent._add_metadata(response, research_results, analysis_results)
# Should include comprehensive metadata
assert "sources" in metadata
assert "confidence" in metadata
assert "methodology" in metadata
assert "timestamp" in metadata
class TestAgenticRAGService:
"""Test the complete Agentic RAG Service orchestration."""
@pytest.fixture
async def agentic_service(self):
"""Create a test instance of AgenticRAGService."""
with patch('app.services.agentic_rag_service.VectorService') as mock_vector_service:
mock_vector_service.return_value = AsyncMock()
service = AgenticRAGService()
yield service
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_autonomous_workflow_execution(self, mock_llm, agentic_service):
"""Test complete autonomous workflow execution."""
mock_llm.side_effect = [
"Research strategy: semantic search",
"Research results: Q4 data found",
"Analysis: Chain of Thought reasoning",
"Analysis results: Strong performance identified",
"Synthesis: Comprehensive summary generated"
]
result = await agentic_service.answer(
tenant_id="test-tenant",
query="Analyze Q4 financial performance",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT,
enable_autonomous_workflow=True
)
# Should return complete response
assert "answer" in result
assert "sources" in result
assert "confidence" in result
assert "metadata" in result
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_simple_workflow_execution(self, mock_llm, agentic_service):
"""Test simple workflow execution."""
mock_llm.return_value = "Simple response generated"
result = await agentic_service.answer(
tenant_id="test-tenant",
query="What is our revenue?",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT,
enable_autonomous_workflow=False
)
# Should return response using simple workflow
assert "answer" in result
assert "sources" in result
async def test_agent_status_monitoring(self, agentic_service):
"""Test agent status monitoring."""
status = await agentic_service.get_agent_status()
# Should return status for all agents
assert "research_agent" in status
assert "analysis_agent" in status
assert "synthesis_agent" in status
# Each agent should have status information
for agent_status in status.values():
assert "status" in agent_status
assert "memory_usage" in agent_status
assert "last_activity" in agent_status
async def test_agent_memory_reset(self, agentic_service):
"""Test agent memory reset functionality."""
# Test reset all agents
success = await agentic_service.reset_agent_memory()
assert success is True
# Test reset specific agent
success = await agentic_service.reset_agent_memory(AgentType.RESEARCH)
assert success is True
class TestIntegration:
"""Integration tests for the complete agentic RAG system."""
@pytest.fixture
async def integration_service(self):
"""Create a service instance for integration testing."""
with patch('app.services.agentic_rag_service.VectorService') as mock_vector_service:
mock_vector_service.return_value = AsyncMock()
service = AgenticRAGService()
yield service
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_end_to_end_agentic_workflow(self, mock_llm, integration_service):
"""Test complete end-to-end agentic workflow."""
mock_llm.side_effect = [
"Research: Financial data analysis",
"Analysis: Performance evaluation",
"Synthesis: Executive summary"
]
# Test complex query with autonomous workflow
result = await integration_service.answer(
tenant_id="test-tenant",
query="Provide comprehensive analysis of Q4 performance including risks and opportunities",
reasoning_type=ReasoningType.TREE_OF_THOUGHTS,
enable_autonomous_workflow=True
)
# Verify complete response structure
assert "answer" in result
assert "sources" in result
assert "confidence" in result
assert "metadata" in result
assert "reasoning_type" in result["metadata"]
assert result["metadata"]["reasoning_type"] == "tree_of_thoughts"
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_error_recovery_and_fallback(self, mock_llm, integration_service):
"""Test error recovery and fallback mechanisms."""
# Simulate LLM failure
mock_llm.side_effect = Exception("LLM service unavailable")
# Should gracefully handle errors and provide fallback
result = await integration_service.answer(
tenant_id="test-tenant",
query="Test query",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT,
enable_autonomous_workflow=True
)
# Should still return a response (even if it's an error message)
assert "answer" in result
assert "error" in result or "fallback" in result
async def test_tenant_isolation(self, integration_service):
"""Test that agents maintain tenant isolation."""
# Test with different tenants
tenant1_result = await integration_service.answer(
tenant_id="tenant-1",
query="Test query 1",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT
)
tenant2_result = await integration_service.answer(
tenant_id="tenant-2",
query="Test query 2",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT
)
# Results should be different for different tenants
assert tenant1_result != tenant2_result
class TestPerformance:
"""Performance tests for the agentic RAG system."""
@pytest.fixture
async def performance_service(self):
"""Create a service instance for performance testing."""
with patch('app.services.agentic_rag_service.VectorService') as mock_vector_service:
mock_vector_service.return_value = AsyncMock()
service = AgenticRAGService()
yield service
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_response_time_performance(self, mock_llm, performance_service):
"""Test that responses are generated within acceptable time limits."""
mock_llm.return_value = "Performance test response"
import time
start_time = time.time()
result = await performance_service.answer(
tenant_id="test-tenant",
query="Performance test query",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT
)
end_time = time.time()
response_time = end_time - start_time
# Should complete within reasonable time (adjust threshold as needed)
assert response_time < 10.0 # 10 seconds max
assert "answer" in result
async def test_memory_usage_optimization(self, performance_service):
"""Test that memory usage is optimized."""
# Get initial memory status
initial_status = await performance_service.get_agent_status()
# Perform multiple operations
for i in range(5):
await performance_service.answer(
tenant_id=f"test-tenant-{i}",
query=f"Test query {i}",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT
)
# Get final memory status
final_status = await performance_service.get_agent_status()
# Memory usage should be reasonable (not growing exponentially)
for agent_type in ["research_agent", "analysis_agent", "synthesis_agent"]:
initial_memory = initial_status[agent_type]["memory_usage"]
final_memory = final_status[agent_type]["memory_usage"]
# Memory growth should be reasonable
assert final_memory <= initial_memory * 2 # Max 2x growth
class TestReasoningTypes:
"""Test different reasoning types and their effectiveness."""
@pytest.fixture
async def reasoning_service(self):
"""Create a service instance for reasoning type testing."""
with patch('app.services.agentic_rag_service.VectorService') as mock_vector_service:
mock_vector_service.return_value = AsyncMock()
service = AgenticRAGService()
yield service
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_chain_of_thought_reasoning(self, mock_llm, reasoning_service):
"""Test Chain of Thought reasoning effectiveness."""
mock_llm.return_value = "Step-by-step reasoning with clear logic"
result = await reasoning_service.answer(
tenant_id="test-tenant",
query="Explain the reasoning behind Q4 performance",
reasoning_type=ReasoningType.CHAIN_OF_THOUGHT
)
assert result["metadata"]["reasoning_type"] == "chain_of_thought"
assert "answer" in result
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_tree_of_thoughts_reasoning(self, mock_llm, reasoning_service):
"""Test Tree of Thoughts reasoning effectiveness."""
mock_llm.return_value = "Multi-path exploration with synthesis"
result = await reasoning_service.answer(
tenant_id="test-tenant",
query="Explore multiple perspectives on Q4 performance",
reasoning_type=ReasoningType.TREE_OF_THOUGHTS
)
assert result["metadata"]["reasoning_type"] == "tree_of_thoughts"
assert "answer" in result
@patch('app.services.agentic_rag_service.llm_service.generate')
async def test_multi_step_reasoning(self, mock_llm, reasoning_service):
"""Test Multi-Step reasoning effectiveness."""
mock_llm.return_value = "Sequential analysis with validation"
result = await reasoning_service.answer(
tenant_id="test-tenant",
query="Perform detailed step-by-step analysis",
reasoning_type=ReasoningType.MULTI_STEP
)
assert result["metadata"]["reasoning_type"] == "multi_step"
assert "answer" in result