Files
sales-data-analysis/validate_revenue.py
Jonathan Pressnell cf0b596449 Initial commit: sales analysis template
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-06 09:16:34 -05:00

96 lines
3.6 KiB
Python

"""
Revenue validation utility
Validates that revenue calculations are consistent across analyses
"""
import pandas as pd
from config import (
REVENUE_COLUMN, ANALYSIS_YEARS, VALIDATION_ENABLED,
EXPECTED_REVENUE, REVENUE_TOLERANCE_PCT, LTM_ENABLED,
get_ltm_period
)
from analysis_utils import get_annual_data
def validate_revenue(dataframe: pd.DataFrame, analysis_name: str = "Analysis") -> None:
"""
Print annual revenue summary for validation.
This function helps ensure that:
1. Data loading is working correctly
2. Revenue calculations are consistent
3. Filters are not accidentally excluding too much data
Args:
dataframe: DataFrame with revenue and date columns (should have REVENUE_COLUMN and Year)
analysis_name: Name of the analysis (for logging/display)
Example:
>>> validate_revenue(df, "Revenue Analysis")
>>> # Prints annual revenue summary by year
"""
df = dataframe.copy()
# Ensure date column is datetime
from config import DATE_COLUMN
if DATE_COLUMN in df.columns:
df[DATE_COLUMN] = pd.to_datetime(df[DATE_COLUMN], errors='coerce', format='mixed')
# Filter to analysis years
df = df[df['Year'].isin(ANALYSIS_YEARS)]
# Calculate annual revenue
annual_revenue = {}
ltm_start, ltm_end = get_ltm_period() if LTM_ENABLED else (None, None)
for year in sorted(ANALYSIS_YEARS):
if year in df['Year'].unique():
year_data, year_label = get_annual_data(df, year, ltm_start, ltm_end)
if len(year_data) > 0:
revenue = year_data[REVENUE_COLUMN].sum()
annual_revenue[year_label] = revenue
# Print summary
print(f"\n{'='*60}")
print(f"Annual Revenue Validation - {analysis_name}")
print(f"{'='*60}")
if annual_revenue:
for year_label, revenue in annual_revenue.items():
formatted = f"${revenue / 1e6:.2f}m"
print(f" {year_label}: {formatted}")
# Validation against expected values
if VALIDATION_ENABLED and EXPECTED_REVENUE:
print(f"\nValidation Check:")
all_valid = True
for year_label, actual_revenue in annual_revenue.items():
# Try to match year label to expected revenue
year_key = None
if isinstance(year_label, str):
# Extract year number from label (e.g., "2025 (LTM 9/2025)" -> 2025)
import re
year_match = re.search(r'(\d{4})', str(year_label))
if year_match:
year_key = int(year_match.group(1))
else:
year_key = year_label
if year_key in EXPECTED_REVENUE:
expected = EXPECTED_REVENUE[year_key]
tolerance = expected * REVENUE_TOLERANCE_PCT
diff = abs(actual_revenue - expected)
if diff <= tolerance:
print(f"{year_label}: Within tolerance ({diff/1e6:.2f}m difference)")
else:
print(f"{year_label}: Outside tolerance (expected ${expected/1e6:.2f}m, got ${actual_revenue/1e6:.2f}m, diff: ${diff/1e6:.2f}m)")
all_valid = False
if all_valid:
print(" All validations passed!")
else:
print(" WARNING: Some validations failed. Check data loading and filters.")
else:
print(" No revenue data found for analysis years")
print(f"{'='*60}\n")