148 lines
4.7 KiB
Python
148 lines
4.7 KiB
Python
"""
|
|
Template for creating new analysis scripts
|
|
Copy this file and modify for your specific analysis
|
|
|
|
Usage:
|
|
1. Copy this file: cp analysis_template.py my_new_analysis.py
|
|
2. Update the ANALYSIS_NAME and DESCRIPTION
|
|
3. Implement your analysis logic in the main() function
|
|
4. Update the chart generation section
|
|
5. Run: python my_new_analysis.py
|
|
"""
|
|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
from pathlib import Path
|
|
|
|
# Import utilities
|
|
from data_loader import load_sales_data, validate_data_structure
|
|
from validate_revenue import validate_revenue
|
|
from analysis_utils import (
|
|
get_ltm_period_config, get_annual_data, calculate_annual_metrics,
|
|
get_millions_formatter, setup_revenue_chart, save_chart,
|
|
format_currency, print_annual_summary, sort_mixed_years,
|
|
apply_exclusion_filters
|
|
)
|
|
from config import (
|
|
DATA_FILE, OUTPUT_DIR, ANALYSIS_YEARS, MAX_DATE,
|
|
CHART_SIZES, ensure_directories, get_data_path, COMPANY_NAME
|
|
)
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
ANALYSIS_NAME = "Template Analysis"
|
|
DESCRIPTION = "Template for new analyses - customize this for your specific analysis"
|
|
|
|
# ============================================================================
|
|
# MAIN ANALYSIS FUNCTION
|
|
# ============================================================================
|
|
|
|
def main():
|
|
"""Main analysis function"""
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"{ANALYSIS_NAME}")
|
|
print(f"{'='*60}\n")
|
|
|
|
# 1. Load data
|
|
print("Loading data...")
|
|
try:
|
|
df = load_sales_data(get_data_path())
|
|
print(f"Loaded {len(df):,} transactions")
|
|
except Exception as e:
|
|
print(f"ERROR loading data: {e}")
|
|
return
|
|
|
|
# 2. Validate data structure
|
|
is_valid, msg = validate_data_structure(df)
|
|
if not is_valid:
|
|
print(f"ERROR: {msg}")
|
|
return
|
|
print("Data validation passed")
|
|
|
|
# 3. Apply exclusion filters (if configured)
|
|
df = apply_exclusion_filters(df)
|
|
|
|
# 4. Filter by date range
|
|
from config import MIN_YEAR, DATE_COLUMN
|
|
df = df[df['Year'] >= MIN_YEAR]
|
|
if DATE_COLUMN in df.columns:
|
|
df = df[df[DATE_COLUMN] <= MAX_DATE]
|
|
|
|
# 5. Setup LTM period (if enabled)
|
|
ltm_start, ltm_end = get_ltm_period_config()
|
|
if ltm_start and ltm_end:
|
|
print(f"LTM period: {ltm_start} to {ltm_end}")
|
|
|
|
# 6. Prepare data
|
|
print("\nPreparing data...")
|
|
# Add your data preparation logic here
|
|
# Example: df['CustomColumn'] = df[REVENUE_COLUMN] * df[QUANTITY_COLUMN]
|
|
|
|
# 7. Calculate annual metrics
|
|
print("\nCalculating annual metrics...")
|
|
|
|
def calculate_metrics(year_data):
|
|
"""Calculate metrics for a single year"""
|
|
from config import REVENUE_COLUMN
|
|
return {
|
|
'Revenue': year_data[REVENUE_COLUMN].sum(),
|
|
# Add your custom metrics here
|
|
# 'CustomMetric': year_data['CustomColumn'].mean(),
|
|
}
|
|
|
|
annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
|
|
|
|
# 8. Print summary
|
|
print_annual_summary(annual_df, 'Revenue', 'Revenue')
|
|
|
|
# 9. Create visualizations
|
|
print("Generating charts...")
|
|
ensure_directories()
|
|
|
|
# Example chart: Annual revenue trend
|
|
fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
|
|
|
|
# Prepare data for plotting (handle mixed types)
|
|
annual_df_sorted = sort_mixed_years(annual_df.reset_index(), 'Year')
|
|
years = annual_df_sorted['Year'].tolist()
|
|
revenue = annual_df_sorted['Revenue'].values / 1e6 # Convert to millions
|
|
|
|
# Create chart
|
|
ax.plot(range(len(years)), revenue, marker='o', linewidth=2, markersize=8)
|
|
ax.set_xticks(range(len(years)))
|
|
ax.set_xticklabels(years, rotation=45, ha='right')
|
|
setup_revenue_chart(ax)
|
|
|
|
# Add LTM notation to title if applicable
|
|
title = f'Annual Revenue Trend - {COMPANY_NAME}'
|
|
if ltm_start and ltm_end:
|
|
from config import get_ltm_label
|
|
ltm_label = get_ltm_label()
|
|
if ltm_label:
|
|
title += f'\n({ltm_label})'
|
|
ax.set_title(title)
|
|
|
|
plt.tight_layout()
|
|
save_chart(fig, f'{ANALYSIS_NAME.lower().replace(" ", "_")}_trend.png')
|
|
plt.close()
|
|
|
|
# Add more charts as needed...
|
|
|
|
# 10. Validate revenue
|
|
print("\nValidating revenue...")
|
|
validate_revenue(df, ANALYSIS_NAME)
|
|
|
|
print(f"\n{ANALYSIS_NAME} complete!")
|
|
print(f"Charts saved to: {OUTPUT_DIR}")
|
|
|
|
# ============================================================================
|
|
# RUN ANALYSIS
|
|
# ============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
main()
|