""" Revenue validation utility Validates that revenue calculations are consistent across analyses """ import pandas as pd from config import ( REVENUE_COLUMN, ANALYSIS_YEARS, VALIDATION_ENABLED, EXPECTED_REVENUE, REVENUE_TOLERANCE_PCT, LTM_ENABLED, get_ltm_period ) from analysis_utils import get_annual_data def validate_revenue(dataframe: pd.DataFrame, analysis_name: str = "Analysis") -> None: """ Print annual revenue summary for validation. This function helps ensure that: 1. Data loading is working correctly 2. Revenue calculations are consistent 3. Filters are not accidentally excluding too much data Args: dataframe: DataFrame with revenue and date columns (should have REVENUE_COLUMN and Year) analysis_name: Name of the analysis (for logging/display) Example: >>> validate_revenue(df, "Revenue Analysis") >>> # Prints annual revenue summary by year """ df = dataframe.copy() # Ensure date column is datetime from config import DATE_COLUMN if DATE_COLUMN in df.columns: df[DATE_COLUMN] = pd.to_datetime(df[DATE_COLUMN], errors='coerce', format='mixed') # Filter to analysis years df = df[df['Year'].isin(ANALYSIS_YEARS)] # Calculate annual revenue annual_revenue = {} ltm_start, ltm_end = get_ltm_period() if LTM_ENABLED else (None, None) for year in sorted(ANALYSIS_YEARS): if year in df['Year'].unique(): year_data, year_label = get_annual_data(df, year, ltm_start, ltm_end) if len(year_data) > 0: revenue = year_data[REVENUE_COLUMN].sum() annual_revenue[year_label] = revenue # Print summary print(f"\n{'='*60}") print(f"Annual Revenue Validation - {analysis_name}") print(f"{'='*60}") if annual_revenue: for year_label, revenue in annual_revenue.items(): formatted = f"${revenue / 1e6:.2f}m" print(f" {year_label}: {formatted}") # Validation against expected values if VALIDATION_ENABLED and EXPECTED_REVENUE: print(f"\nValidation Check:") all_valid = True for year_label, actual_revenue in annual_revenue.items(): # Try to match year label to expected revenue year_key = None if isinstance(year_label, str): # Extract year number from label (e.g., "2025 (LTM 9/2025)" -> 2025) import re year_match = re.search(r'(\d{4})', str(year_label)) if year_match: year_key = int(year_match.group(1)) else: year_key = year_label if year_key in EXPECTED_REVENUE: expected = EXPECTED_REVENUE[year_key] tolerance = expected * REVENUE_TOLERANCE_PCT diff = abs(actual_revenue - expected) if diff <= tolerance: print(f" ✓ {year_label}: Within tolerance ({diff/1e6:.2f}m difference)") else: print(f" ✗ {year_label}: Outside tolerance (expected ${expected/1e6:.2f}m, got ${actual_revenue/1e6:.2f}m, diff: ${diff/1e6:.2f}m)") all_valid = False if all_valid: print(" All validations passed!") else: print(" WARNING: Some validations failed. Check data loading and filters.") else: print(" No revenue data found for analysis years") print(f"{'='*60}\n")