96 lines
3.6 KiB
Python
96 lines
3.6 KiB
Python
"""
|
|
Revenue validation utility
|
|
Validates that revenue calculations are consistent across analyses
|
|
"""
|
|
import pandas as pd
|
|
from config import (
|
|
REVENUE_COLUMN, ANALYSIS_YEARS, VALIDATION_ENABLED,
|
|
EXPECTED_REVENUE, REVENUE_TOLERANCE_PCT, LTM_ENABLED,
|
|
get_ltm_period
|
|
)
|
|
from analysis_utils import get_annual_data
|
|
|
|
def validate_revenue(dataframe: pd.DataFrame, analysis_name: str = "Analysis") -> None:
|
|
"""
|
|
Print annual revenue summary for validation.
|
|
|
|
This function helps ensure that:
|
|
1. Data loading is working correctly
|
|
2. Revenue calculations are consistent
|
|
3. Filters are not accidentally excluding too much data
|
|
|
|
Args:
|
|
dataframe: DataFrame with revenue and date columns (should have REVENUE_COLUMN and Year)
|
|
analysis_name: Name of the analysis (for logging/display)
|
|
|
|
Example:
|
|
>>> validate_revenue(df, "Revenue Analysis")
|
|
>>> # Prints annual revenue summary by year
|
|
"""
|
|
df = dataframe.copy()
|
|
|
|
# Ensure date column is datetime
|
|
from config import DATE_COLUMN
|
|
if DATE_COLUMN in df.columns:
|
|
df[DATE_COLUMN] = pd.to_datetime(df[DATE_COLUMN], errors='coerce', format='mixed')
|
|
|
|
# Filter to analysis years
|
|
df = df[df['Year'].isin(ANALYSIS_YEARS)]
|
|
|
|
# Calculate annual revenue
|
|
annual_revenue = {}
|
|
ltm_start, ltm_end = get_ltm_period() if LTM_ENABLED else (None, None)
|
|
|
|
for year in sorted(ANALYSIS_YEARS):
|
|
if year in df['Year'].unique():
|
|
year_data, year_label = get_annual_data(df, year, ltm_start, ltm_end)
|
|
if len(year_data) > 0:
|
|
revenue = year_data[REVENUE_COLUMN].sum()
|
|
annual_revenue[year_label] = revenue
|
|
|
|
# Print summary
|
|
print(f"\n{'='*60}")
|
|
print(f"Annual Revenue Validation - {analysis_name}")
|
|
print(f"{'='*60}")
|
|
|
|
if annual_revenue:
|
|
for year_label, revenue in annual_revenue.items():
|
|
formatted = f"${revenue / 1e6:.2f}m"
|
|
print(f" {year_label}: {formatted}")
|
|
|
|
# Validation against expected values
|
|
if VALIDATION_ENABLED and EXPECTED_REVENUE:
|
|
print(f"\nValidation Check:")
|
|
all_valid = True
|
|
for year_label, actual_revenue in annual_revenue.items():
|
|
# Try to match year label to expected revenue
|
|
year_key = None
|
|
if isinstance(year_label, str):
|
|
# Extract year number from label (e.g., "2025 (LTM 9/2025)" -> 2025)
|
|
import re
|
|
year_match = re.search(r'(\d{4})', str(year_label))
|
|
if year_match:
|
|
year_key = int(year_match.group(1))
|
|
else:
|
|
year_key = year_label
|
|
|
|
if year_key in EXPECTED_REVENUE:
|
|
expected = EXPECTED_REVENUE[year_key]
|
|
tolerance = expected * REVENUE_TOLERANCE_PCT
|
|
diff = abs(actual_revenue - expected)
|
|
|
|
if diff <= tolerance:
|
|
print(f" ✓ {year_label}: Within tolerance ({diff/1e6:.2f}m difference)")
|
|
else:
|
|
print(f" ✗ {year_label}: Outside tolerance (expected ${expected/1e6:.2f}m, got ${actual_revenue/1e6:.2f}m, diff: ${diff/1e6:.2f}m)")
|
|
all_valid = False
|
|
|
|
if all_valid:
|
|
print(" All validations passed!")
|
|
else:
|
|
print(" WARNING: Some validations failed. Check data loading and filters.")
|
|
else:
|
|
print(" No revenue data found for analysis years")
|
|
|
|
print(f"{'='*60}\n")
|