204 lines
7.4 KiB
Python
204 lines
7.4 KiB
Python
"""
|
|
Example: Product Performance Analysis
|
|
Example showing product mix and performance analysis
|
|
|
|
This example demonstrates:
|
|
- Product-level aggregation
|
|
- Product performance metrics
|
|
- Product mix visualization
|
|
"""
|
|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from pathlib import Path
|
|
|
|
# Import utilities
|
|
from data_loader import load_sales_data, validate_data_structure
|
|
from validate_revenue import validate_revenue
|
|
from analysis_utils import (
|
|
get_ltm_period_config, calculate_annual_metrics,
|
|
apply_exclusion_filters, setup_revenue_chart, save_chart,
|
|
format_currency, sort_mixed_years
|
|
)
|
|
from config import (
|
|
OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
|
|
get_data_path, COMPANY_NAME, REVENUE_COLUMN, ITEM_COLUMN,
|
|
DATE_COLUMN, MIN_YEAR, QUANTITY_COLUMN
|
|
)
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
ANALYSIS_NAME = "Product Performance Analysis"
|
|
DESCRIPTION = "Product mix and performance analysis"
|
|
|
|
# ============================================================================
|
|
# MAIN ANALYSIS FUNCTION
|
|
# ============================================================================
|
|
|
|
def main():
|
|
"""Main analysis function"""
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"{ANALYSIS_NAME}")
|
|
print(f"{'='*60}\n")
|
|
|
|
# 1. Load data
|
|
print("Loading data...")
|
|
try:
|
|
df = load_sales_data(get_data_path())
|
|
print(f"Loaded {len(df):,} transactions")
|
|
except Exception as e:
|
|
print(f"ERROR loading data: {e}")
|
|
return
|
|
|
|
# 2. Validate data structure
|
|
is_valid, msg = validate_data_structure(df)
|
|
if not is_valid:
|
|
print(f"ERROR: {msg}")
|
|
return
|
|
|
|
if ITEM_COLUMN not in df.columns:
|
|
print(f"WARNING: Item column '{ITEM_COLUMN}' not found. Using transaction-level analysis.")
|
|
# Create a dummy item column for demonstration
|
|
df[ITEM_COLUMN] = 'All Products'
|
|
|
|
print("Data validation passed")
|
|
|
|
# 3. Apply exclusion filters
|
|
df = apply_exclusion_filters(df)
|
|
|
|
# 4. Filter by date range
|
|
df = df[df['Year'] >= MIN_YEAR]
|
|
if DATE_COLUMN in df.columns:
|
|
df = df[df[DATE_COLUMN] <= MAX_DATE]
|
|
|
|
# 5. Setup LTM period
|
|
ltm_start, ltm_end = get_ltm_period_config()
|
|
|
|
# 6. Product performance summary
|
|
print("\nCalculating product performance...")
|
|
|
|
# Get most recent period data
|
|
if ltm_start and ltm_end and 'YearMonth' in df.columns:
|
|
recent_data = df[(df['YearMonth'] >= ltm_start) & (df['YearMonth'] <= ltm_end)]
|
|
period_label = f"LTM {ltm_end}"
|
|
else:
|
|
recent_year = df['Year'].max()
|
|
recent_data = df[df['Year'] == recent_year]
|
|
period_label = str(recent_year)
|
|
|
|
# Product-level metrics
|
|
product_metrics = recent_data.groupby(ITEM_COLUMN).agg({
|
|
REVENUE_COLUMN: ['sum', 'count'],
|
|
QUANTITY_COLUMN: 'sum' if QUANTITY_COLUMN in df.columns else 'count'
|
|
}).reset_index()
|
|
|
|
product_metrics.columns = [ITEM_COLUMN, 'Revenue', 'Transaction_Count', 'Quantity']
|
|
|
|
# Calculate average price per unit if quantity available
|
|
if QUANTITY_COLUMN in df.columns:
|
|
product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Quantity'].replace(0, np.nan)
|
|
else:
|
|
product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Transaction_Count']
|
|
|
|
# Sort by revenue
|
|
product_metrics = product_metrics.sort_values('Revenue', ascending=False)
|
|
|
|
# Top products summary
|
|
print(f"\nTop 10 Products by Revenue ({period_label}):")
|
|
print("-" * 80)
|
|
top_10 = product_metrics.head(10)
|
|
total_revenue = product_metrics['Revenue'].sum()
|
|
|
|
for idx, row in top_10.iterrows():
|
|
pct = (row['Revenue'] / total_revenue) * 100
|
|
print(f"{row[ITEM_COLUMN]:30s}: {format_currency(row['Revenue']):>12s} ({pct:5.1f}%)")
|
|
|
|
# 7. Annual product trends (if multiple years available)
|
|
if len(df['Year'].unique()) > 1:
|
|
print("\nCalculating annual product trends...")
|
|
|
|
def calculate_product_metrics(year_data):
|
|
"""Calculate product metrics for a year"""
|
|
product_revenue = year_data.groupby(ITEM_COLUMN)[REVENUE_COLUMN].sum()
|
|
# Get top 5 products
|
|
top_5 = product_revenue.nlargest(5)
|
|
return dict(top_5)
|
|
|
|
annual_product_df = calculate_annual_metrics(df, calculate_product_metrics, ltm_start, ltm_end)
|
|
|
|
# 8. Create visualizations
|
|
print("\nGenerating charts...")
|
|
ensure_directories()
|
|
|
|
# Chart 1: Top Products Revenue (Bar Chart)
|
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
|
|
|
|
top_10_revenue = top_10['Revenue'].values / 1e6
|
|
top_10_names = top_10[ITEM_COLUMN].values
|
|
|
|
ax1.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
|
|
ax1.set_yticks(range(len(top_10)))
|
|
ax1.set_yticklabels([name[:30] + '...' if len(name) > 30 else name for name in top_10_names])
|
|
ax1.set_xlabel('Revenue (Millions USD)')
|
|
ax1.set_title(f'Top 10 Products by Revenue\n({period_label})', fontsize=12, fontweight='bold')
|
|
setup_revenue_chart(ax1)
|
|
ax1.set_ylabel('')
|
|
|
|
# Chart 2: Revenue Distribution (Pie Chart for top 10)
|
|
if len(product_metrics) > 10:
|
|
other_revenue = product_metrics.iloc[10:]['Revenue'].sum()
|
|
pie_data = list(top_10['Revenue'].values) + [other_revenue]
|
|
pie_labels = list(top_10[ITEM_COLUMN].values) + ['Other']
|
|
else:
|
|
pie_data = product_metrics['Revenue'].values
|
|
pie_labels = product_metrics[ITEM_COLUMN].values
|
|
|
|
pie_data_millions = [x / 1e6 for x in pie_data]
|
|
ax2.pie(pie_data_millions, labels=pie_labels, autopct='%1.1f%%', startangle=90)
|
|
ax2.set_title('Revenue Distribution\n(Top Products)', fontsize=12, fontweight='bold')
|
|
|
|
plt.suptitle(f'Product Performance Analysis - {COMPANY_NAME}',
|
|
fontsize=14, fontweight='bold', y=1.02)
|
|
plt.tight_layout()
|
|
save_chart(fig, 'product_performance.png')
|
|
plt.close()
|
|
else:
|
|
# Single chart if only one year
|
|
print("\nGenerating chart...")
|
|
ensure_directories()
|
|
|
|
fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
|
|
|
|
top_10_revenue = top_10['Revenue'].values / 1e6
|
|
top_10_names = top_10[ITEM_COLUMN].values
|
|
|
|
ax.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
|
|
ax.set_yticks(range(len(top_10)))
|
|
ax.set_yticklabels([name[:40] + '...' if len(name) > 40 else name for name in top_10_names])
|
|
ax.set_xlabel('Revenue (Millions USD)')
|
|
ax.set_title(f'Top 10 Products by Revenue - {COMPANY_NAME}\n({period_label})',
|
|
fontsize=14, fontweight='bold')
|
|
setup_revenue_chart(ax)
|
|
ax.set_ylabel('')
|
|
|
|
plt.tight_layout()
|
|
save_chart(fig, 'product_performance.png')
|
|
plt.close()
|
|
|
|
# 9. Validate revenue
|
|
print("\nValidating revenue...")
|
|
validate_revenue(df, ANALYSIS_NAME)
|
|
|
|
print(f"\n{ANALYSIS_NAME} complete!")
|
|
print(f"Charts saved to: {OUTPUT_DIR}")
|
|
|
|
# ============================================================================
|
|
# RUN ANALYSIS
|
|
# ============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
main()
|