Files
sales-data-analysis/examples/product_performance.py
Jonathan Pressnell cf0b596449 Initial commit: sales analysis template
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-06 09:16:34 -05:00

204 lines
7.4 KiB
Python

"""
Example: Product Performance Analysis
Example showing product mix and performance analysis
This example demonstrates:
- Product-level aggregation
- Product performance metrics
- Product mix visualization
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
# Import utilities
from data_loader import load_sales_data, validate_data_structure
from validate_revenue import validate_revenue
from analysis_utils import (
get_ltm_period_config, calculate_annual_metrics,
apply_exclusion_filters, setup_revenue_chart, save_chart,
format_currency, sort_mixed_years
)
from config import (
OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
get_data_path, COMPANY_NAME, REVENUE_COLUMN, ITEM_COLUMN,
DATE_COLUMN, MIN_YEAR, QUANTITY_COLUMN
)
# ============================================================================
# CONFIGURATION
# ============================================================================
ANALYSIS_NAME = "Product Performance Analysis"
DESCRIPTION = "Product mix and performance analysis"
# ============================================================================
# MAIN ANALYSIS FUNCTION
# ============================================================================
def main():
"""Main analysis function"""
print(f"\n{'='*60}")
print(f"{ANALYSIS_NAME}")
print(f"{'='*60}\n")
# 1. Load data
print("Loading data...")
try:
df = load_sales_data(get_data_path())
print(f"Loaded {len(df):,} transactions")
except Exception as e:
print(f"ERROR loading data: {e}")
return
# 2. Validate data structure
is_valid, msg = validate_data_structure(df)
if not is_valid:
print(f"ERROR: {msg}")
return
if ITEM_COLUMN not in df.columns:
print(f"WARNING: Item column '{ITEM_COLUMN}' not found. Using transaction-level analysis.")
# Create a dummy item column for demonstration
df[ITEM_COLUMN] = 'All Products'
print("Data validation passed")
# 3. Apply exclusion filters
df = apply_exclusion_filters(df)
# 4. Filter by date range
df = df[df['Year'] >= MIN_YEAR]
if DATE_COLUMN in df.columns:
df = df[df[DATE_COLUMN] <= MAX_DATE]
# 5. Setup LTM period
ltm_start, ltm_end = get_ltm_period_config()
# 6. Product performance summary
print("\nCalculating product performance...")
# Get most recent period data
if ltm_start and ltm_end and 'YearMonth' in df.columns:
recent_data = df[(df['YearMonth'] >= ltm_start) & (df['YearMonth'] <= ltm_end)]
period_label = f"LTM {ltm_end}"
else:
recent_year = df['Year'].max()
recent_data = df[df['Year'] == recent_year]
period_label = str(recent_year)
# Product-level metrics
product_metrics = recent_data.groupby(ITEM_COLUMN).agg({
REVENUE_COLUMN: ['sum', 'count'],
QUANTITY_COLUMN: 'sum' if QUANTITY_COLUMN in df.columns else 'count'
}).reset_index()
product_metrics.columns = [ITEM_COLUMN, 'Revenue', 'Transaction_Count', 'Quantity']
# Calculate average price per unit if quantity available
if QUANTITY_COLUMN in df.columns:
product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Quantity'].replace(0, np.nan)
else:
product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Transaction_Count']
# Sort by revenue
product_metrics = product_metrics.sort_values('Revenue', ascending=False)
# Top products summary
print(f"\nTop 10 Products by Revenue ({period_label}):")
print("-" * 80)
top_10 = product_metrics.head(10)
total_revenue = product_metrics['Revenue'].sum()
for idx, row in top_10.iterrows():
pct = (row['Revenue'] / total_revenue) * 100
print(f"{row[ITEM_COLUMN]:30s}: {format_currency(row['Revenue']):>12s} ({pct:5.1f}%)")
# 7. Annual product trends (if multiple years available)
if len(df['Year'].unique()) > 1:
print("\nCalculating annual product trends...")
def calculate_product_metrics(year_data):
"""Calculate product metrics for a year"""
product_revenue = year_data.groupby(ITEM_COLUMN)[REVENUE_COLUMN].sum()
# Get top 5 products
top_5 = product_revenue.nlargest(5)
return dict(top_5)
annual_product_df = calculate_annual_metrics(df, calculate_product_metrics, ltm_start, ltm_end)
# 8. Create visualizations
print("\nGenerating charts...")
ensure_directories()
# Chart 1: Top Products Revenue (Bar Chart)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
top_10_revenue = top_10['Revenue'].values / 1e6
top_10_names = top_10[ITEM_COLUMN].values
ax1.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
ax1.set_yticks(range(len(top_10)))
ax1.set_yticklabels([name[:30] + '...' if len(name) > 30 else name for name in top_10_names])
ax1.set_xlabel('Revenue (Millions USD)')
ax1.set_title(f'Top 10 Products by Revenue\n({period_label})', fontsize=12, fontweight='bold')
setup_revenue_chart(ax1)
ax1.set_ylabel('')
# Chart 2: Revenue Distribution (Pie Chart for top 10)
if len(product_metrics) > 10:
other_revenue = product_metrics.iloc[10:]['Revenue'].sum()
pie_data = list(top_10['Revenue'].values) + [other_revenue]
pie_labels = list(top_10[ITEM_COLUMN].values) + ['Other']
else:
pie_data = product_metrics['Revenue'].values
pie_labels = product_metrics[ITEM_COLUMN].values
pie_data_millions = [x / 1e6 for x in pie_data]
ax2.pie(pie_data_millions, labels=pie_labels, autopct='%1.1f%%', startangle=90)
ax2.set_title('Revenue Distribution\n(Top Products)', fontsize=12, fontweight='bold')
plt.suptitle(f'Product Performance Analysis - {COMPANY_NAME}',
fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
save_chart(fig, 'product_performance.png')
plt.close()
else:
# Single chart if only one year
print("\nGenerating chart...")
ensure_directories()
fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
top_10_revenue = top_10['Revenue'].values / 1e6
top_10_names = top_10[ITEM_COLUMN].values
ax.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
ax.set_yticks(range(len(top_10)))
ax.set_yticklabels([name[:40] + '...' if len(name) > 40 else name for name in top_10_names])
ax.set_xlabel('Revenue (Millions USD)')
ax.set_title(f'Top 10 Products by Revenue - {COMPANY_NAME}\n({period_label})',
fontsize=14, fontweight='bold')
setup_revenue_chart(ax)
ax.set_ylabel('')
plt.tight_layout()
save_chart(fig, 'product_performance.png')
plt.close()
# 9. Validate revenue
print("\nValidating revenue...")
validate_revenue(df, ANALYSIS_NAME)
print(f"\n{ANALYSIS_NAME} complete!")
print(f"Charts saved to: {OUTPUT_DIR}")
# ============================================================================
# RUN ANALYSIS
# ============================================================================
if __name__ == "__main__":
main()