Files
sales-data-analysis/analysis_template.py
Jonathan Pressnell cf0b596449 Initial commit: sales analysis template
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-06 09:16:34 -05:00

148 lines
4.7 KiB
Python

"""
Template for creating new analysis scripts
Copy this file and modify for your specific analysis
Usage:
1. Copy this file: cp analysis_template.py my_new_analysis.py
2. Update the ANALYSIS_NAME and DESCRIPTION
3. Implement your analysis logic in the main() function
4. Update the chart generation section
5. Run: python my_new_analysis.py
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
# Import utilities
from data_loader import load_sales_data, validate_data_structure
from validate_revenue import validate_revenue
from analysis_utils import (
get_ltm_period_config, get_annual_data, calculate_annual_metrics,
get_millions_formatter, setup_revenue_chart, save_chart,
format_currency, print_annual_summary, sort_mixed_years,
apply_exclusion_filters
)
from config import (
DATA_FILE, OUTPUT_DIR, ANALYSIS_YEARS, MAX_DATE,
CHART_SIZES, ensure_directories, get_data_path, COMPANY_NAME
)
# ============================================================================
# CONFIGURATION
# ============================================================================
ANALYSIS_NAME = "Template Analysis"
DESCRIPTION = "Template for new analyses - customize this for your specific analysis"
# ============================================================================
# MAIN ANALYSIS FUNCTION
# ============================================================================
def main():
"""Main analysis function"""
print(f"\n{'='*60}")
print(f"{ANALYSIS_NAME}")
print(f"{'='*60}\n")
# 1. Load data
print("Loading data...")
try:
df = load_sales_data(get_data_path())
print(f"Loaded {len(df):,} transactions")
except Exception as e:
print(f"ERROR loading data: {e}")
return
# 2. Validate data structure
is_valid, msg = validate_data_structure(df)
if not is_valid:
print(f"ERROR: {msg}")
return
print("Data validation passed")
# 3. Apply exclusion filters (if configured)
df = apply_exclusion_filters(df)
# 4. Filter by date range
from config import MIN_YEAR, DATE_COLUMN
df = df[df['Year'] >= MIN_YEAR]
if DATE_COLUMN in df.columns:
df = df[df[DATE_COLUMN] <= MAX_DATE]
# 5. Setup LTM period (if enabled)
ltm_start, ltm_end = get_ltm_period_config()
if ltm_start and ltm_end:
print(f"LTM period: {ltm_start} to {ltm_end}")
# 6. Prepare data
print("\nPreparing data...")
# Add your data preparation logic here
# Example: df['CustomColumn'] = df[REVENUE_COLUMN] * df[QUANTITY_COLUMN]
# 7. Calculate annual metrics
print("\nCalculating annual metrics...")
def calculate_metrics(year_data):
"""Calculate metrics for a single year"""
from config import REVENUE_COLUMN
return {
'Revenue': year_data[REVENUE_COLUMN].sum(),
# Add your custom metrics here
# 'CustomMetric': year_data['CustomColumn'].mean(),
}
annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
# 8. Print summary
print_annual_summary(annual_df, 'Revenue', 'Revenue')
# 9. Create visualizations
print("Generating charts...")
ensure_directories()
# Example chart: Annual revenue trend
fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
# Prepare data for plotting (handle mixed types)
annual_df_sorted = sort_mixed_years(annual_df.reset_index(), 'Year')
years = annual_df_sorted['Year'].tolist()
revenue = annual_df_sorted['Revenue'].values / 1e6 # Convert to millions
# Create chart
ax.plot(range(len(years)), revenue, marker='o', linewidth=2, markersize=8)
ax.set_xticks(range(len(years)))
ax.set_xticklabels(years, rotation=45, ha='right')
setup_revenue_chart(ax)
# Add LTM notation to title if applicable
title = f'Annual Revenue Trend - {COMPANY_NAME}'
if ltm_start and ltm_end:
from config import get_ltm_label
ltm_label = get_ltm_label()
if ltm_label:
title += f'\n({ltm_label})'
ax.set_title(title)
plt.tight_layout()
save_chart(fig, f'{ANALYSIS_NAME.lower().replace(" ", "_")}_trend.png')
plt.close()
# Add more charts as needed...
# 10. Validate revenue
print("\nValidating revenue...")
validate_revenue(df, ANALYSIS_NAME)
print(f"\n{ANALYSIS_NAME} complete!")
print(f"Charts saved to: {OUTPUT_DIR}")
# ============================================================================
# RUN ANALYSIS
# ============================================================================
if __name__ == "__main__":
main()