""" Example: Product Performance Analysis Example showing product mix and performance analysis This example demonstrates: - Product-level aggregation - Product performance metrics - Product mix visualization """ import pandas as pd import numpy as np import matplotlib.pyplot as plt from pathlib import Path # Import utilities from data_loader import load_sales_data, validate_data_structure from validate_revenue import validate_revenue from analysis_utils import ( get_ltm_period_config, calculate_annual_metrics, apply_exclusion_filters, setup_revenue_chart, save_chart, format_currency, sort_mixed_years ) from config import ( OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories, get_data_path, COMPANY_NAME, REVENUE_COLUMN, ITEM_COLUMN, DATE_COLUMN, MIN_YEAR, QUANTITY_COLUMN ) # ============================================================================ # CONFIGURATION # ============================================================================ ANALYSIS_NAME = "Product Performance Analysis" DESCRIPTION = "Product mix and performance analysis" # ============================================================================ # MAIN ANALYSIS FUNCTION # ============================================================================ def main(): """Main analysis function""" print(f"\n{'='*60}") print(f"{ANALYSIS_NAME}") print(f"{'='*60}\n") # 1. Load data print("Loading data...") try: df = load_sales_data(get_data_path()) print(f"Loaded {len(df):,} transactions") except Exception as e: print(f"ERROR loading data: {e}") return # 2. Validate data structure is_valid, msg = validate_data_structure(df) if not is_valid: print(f"ERROR: {msg}") return if ITEM_COLUMN not in df.columns: print(f"WARNING: Item column '{ITEM_COLUMN}' not found. Using transaction-level analysis.") # Create a dummy item column for demonstration df[ITEM_COLUMN] = 'All Products' print("Data validation passed") # 3. Apply exclusion filters df = apply_exclusion_filters(df) # 4. Filter by date range df = df[df['Year'] >= MIN_YEAR] if DATE_COLUMN in df.columns: df = df[df[DATE_COLUMN] <= MAX_DATE] # 5. Setup LTM period ltm_start, ltm_end = get_ltm_period_config() # 6. Product performance summary print("\nCalculating product performance...") # Get most recent period data if ltm_start and ltm_end and 'YearMonth' in df.columns: recent_data = df[(df['YearMonth'] >= ltm_start) & (df['YearMonth'] <= ltm_end)] period_label = f"LTM {ltm_end}" else: recent_year = df['Year'].max() recent_data = df[df['Year'] == recent_year] period_label = str(recent_year) # Product-level metrics product_metrics = recent_data.groupby(ITEM_COLUMN).agg({ REVENUE_COLUMN: ['sum', 'count'], QUANTITY_COLUMN: 'sum' if QUANTITY_COLUMN in df.columns else 'count' }).reset_index() product_metrics.columns = [ITEM_COLUMN, 'Revenue', 'Transaction_Count', 'Quantity'] # Calculate average price per unit if quantity available if QUANTITY_COLUMN in df.columns: product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Quantity'].replace(0, np.nan) else: product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Transaction_Count'] # Sort by revenue product_metrics = product_metrics.sort_values('Revenue', ascending=False) # Top products summary print(f"\nTop 10 Products by Revenue ({period_label}):") print("-" * 80) top_10 = product_metrics.head(10) total_revenue = product_metrics['Revenue'].sum() for idx, row in top_10.iterrows(): pct = (row['Revenue'] / total_revenue) * 100 print(f"{row[ITEM_COLUMN]:30s}: {format_currency(row['Revenue']):>12s} ({pct:5.1f}%)") # 7. Annual product trends (if multiple years available) if len(df['Year'].unique()) > 1: print("\nCalculating annual product trends...") def calculate_product_metrics(year_data): """Calculate product metrics for a year""" product_revenue = year_data.groupby(ITEM_COLUMN)[REVENUE_COLUMN].sum() # Get top 5 products top_5 = product_revenue.nlargest(5) return dict(top_5) annual_product_df = calculate_annual_metrics(df, calculate_product_metrics, ltm_start, ltm_end) # 8. Create visualizations print("\nGenerating charts...") ensure_directories() # Chart 1: Top Products Revenue (Bar Chart) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide']) top_10_revenue = top_10['Revenue'].values / 1e6 top_10_names = top_10[ITEM_COLUMN].values ax1.barh(range(len(top_10)), top_10_revenue, color='#2E86AB') ax1.set_yticks(range(len(top_10))) ax1.set_yticklabels([name[:30] + '...' if len(name) > 30 else name for name in top_10_names]) ax1.set_xlabel('Revenue (Millions USD)') ax1.set_title(f'Top 10 Products by Revenue\n({period_label})', fontsize=12, fontweight='bold') setup_revenue_chart(ax1) ax1.set_ylabel('') # Chart 2: Revenue Distribution (Pie Chart for top 10) if len(product_metrics) > 10: other_revenue = product_metrics.iloc[10:]['Revenue'].sum() pie_data = list(top_10['Revenue'].values) + [other_revenue] pie_labels = list(top_10[ITEM_COLUMN].values) + ['Other'] else: pie_data = product_metrics['Revenue'].values pie_labels = product_metrics[ITEM_COLUMN].values pie_data_millions = [x / 1e6 for x in pie_data] ax2.pie(pie_data_millions, labels=pie_labels, autopct='%1.1f%%', startangle=90) ax2.set_title('Revenue Distribution\n(Top Products)', fontsize=12, fontweight='bold') plt.suptitle(f'Product Performance Analysis - {COMPANY_NAME}', fontsize=14, fontweight='bold', y=1.02) plt.tight_layout() save_chart(fig, 'product_performance.png') plt.close() else: # Single chart if only one year print("\nGenerating chart...") ensure_directories() fig, ax = plt.subplots(figsize=CHART_SIZES['medium']) top_10_revenue = top_10['Revenue'].values / 1e6 top_10_names = top_10[ITEM_COLUMN].values ax.barh(range(len(top_10)), top_10_revenue, color='#2E86AB') ax.set_yticks(range(len(top_10))) ax.set_yticklabels([name[:40] + '...' if len(name) > 40 else name for name in top_10_names]) ax.set_xlabel('Revenue (Millions USD)') ax.set_title(f'Top 10 Products by Revenue - {COMPANY_NAME}\n({period_label})', fontsize=14, fontweight='bold') setup_revenue_chart(ax) ax.set_ylabel('') plt.tight_layout() save_chart(fig, 'product_performance.png') plt.close() # 9. Validate revenue print("\nValidating revenue...") validate_revenue(df, ANALYSIS_NAME) print(f"\n{ANALYSIS_NAME} complete!") print(f"Charts saved to: {OUTPUT_DIR}") # ============================================================================ # RUN ANALYSIS # ============================================================================ if __name__ == "__main__": main()