Initial commit: sales analysis template

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Jonathan Pressnell
2026-02-06 09:16:34 -05:00
commit cf0b596449
38 changed files with 8001 additions and 0 deletions

View File

@@ -0,0 +1,134 @@
"""
Example: Annual Revenue Trend Analysis
Simple example showing annual revenue with LTM support
This is a working example that demonstrates:
- Loading data using data_loader
- Calculating annual metrics with LTM
- Creating a revenue trend chart
- Following template best practices
"""
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
# Import utilities
from data_loader import load_sales_data, validate_data_structure
from validate_revenue import validate_revenue
from analysis_utils import (
get_ltm_period_config, calculate_annual_metrics,
setup_revenue_chart, save_chart,
format_currency, print_annual_summary, sort_mixed_years,
apply_exclusion_filters
)
from config import (
OUTPUT_DIR, ANALYSIS_YEARS, MAX_DATE,
CHART_SIZES, ensure_directories, get_data_path, COMPANY_NAME,
REVENUE_COLUMN, MIN_YEAR, DATE_COLUMN
)
# ============================================================================
# CONFIGURATION
# ============================================================================
ANALYSIS_NAME = "Annual Revenue Trend"
DESCRIPTION = "Simple annual revenue trend analysis with LTM support"
# ============================================================================
# MAIN ANALYSIS FUNCTION
# ============================================================================
def main():
"""Main analysis function"""
print(f"\n{'='*60}")
print(f"{ANALYSIS_NAME}")
print(f"{'='*60}\n")
# 1. Load data
print("Loading data...")
try:
df = load_sales_data(get_data_path())
print(f"Loaded {len(df):,} transactions")
except Exception as e:
print(f"ERROR loading data: {e}")
return
# 2. Validate data structure
is_valid, msg = validate_data_structure(df)
if not is_valid:
print(f"ERROR: {msg}")
return
print("Data validation passed")
# 3. Apply exclusion filters (if configured)
df = apply_exclusion_filters(df)
# 4. Filter by date range
df = df[df['Year'] >= MIN_YEAR]
if DATE_COLUMN in df.columns:
df = df[df[DATE_COLUMN] <= MAX_DATE]
# 5. Setup LTM period (if enabled)
ltm_start, ltm_end = get_ltm_period_config()
if ltm_start and ltm_end:
print(f"LTM period: {ltm_start} to {ltm_end}")
# 6. Calculate annual metrics
print("\nCalculating annual metrics...")
def calculate_metrics(year_data):
"""Calculate metrics for a single year"""
return {
'Revenue': year_data[REVENUE_COLUMN].sum(),
}
annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
# 7. Print summary
print_annual_summary(annual_df, 'Revenue', 'Revenue')
# 8. Create visualization
print("Generating chart...")
ensure_directories()
# Annual revenue trend chart
fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
# Prepare data for plotting (handle mixed types)
annual_df_sorted = sort_mixed_years(annual_df.reset_index(), 'Year')
years = annual_df_sorted['Year'].tolist()
revenue = annual_df_sorted['Revenue'].values / 1e6 # Convert to millions
# Create chart
ax.plot(range(len(years)), revenue, marker='o', linewidth=2, markersize=8, color='#2E86AB')
ax.set_xticks(range(len(years)))
ax.set_xticklabels(years, rotation=45, ha='right')
setup_revenue_chart(ax)
# Add LTM notation to title if applicable
title = f'Annual Revenue Trend - {COMPANY_NAME}'
if ltm_start and ltm_end:
from config import get_ltm_label
ltm_label = get_ltm_label()
if ltm_label:
title += f'\n({ltm_label})'
ax.set_title(title, fontsize=14, fontweight='bold')
plt.tight_layout()
save_chart(fig, 'annual_revenue_trend.png')
plt.close()
# 9. Validate revenue
print("\nValidating revenue...")
validate_revenue(df, ANALYSIS_NAME)
print(f"\n{ANALYSIS_NAME} complete!")
print(f"Chart saved to: {OUTPUT_DIR}")
# ============================================================================
# RUN ANALYSIS
# ============================================================================
if __name__ == "__main__":
main()

218
examples/cohort_analysis.py Normal file
View File

@@ -0,0 +1,218 @@
"""
Example: Cohort Analysis
Advanced example showing customer cohort retention analysis
This demonstrates:
- Cohort-based analysis
- Retention rate calculations
- Revenue retention metrics
- Advanced visualization
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from operator import attrgetter
# Import utilities
from data_loader import load_sales_data, validate_data_structure
from validate_revenue import validate_revenue
from analysis_utils import (
get_ltm_period_config, apply_exclusion_filters,
setup_revenue_chart, save_chart, format_currency
)
from config import (
OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
get_data_path, COMPANY_NAME, REVENUE_COLUMN, CUSTOMER_COLUMN,
DATE_COLUMN, MIN_YEAR
)
# ============================================================================
# CONFIGURATION
# ============================================================================
ANALYSIS_NAME = "Cohort Analysis"
DESCRIPTION = "Customer cohort retention and revenue analysis"
# ============================================================================
# COHORT ANALYSIS FUNCTIONS
# ============================================================================
def create_cohorts(df):
"""
Create customer cohorts based on first purchase date
Args:
df: DataFrame with customer and date columns
Returns:
DataFrame: Original DataFrame with 'Cohort' and 'CohortPeriod' columns
"""
from config import CUSTOMER_COLUMN, DATE_COLUMN
# Get first purchase date for each customer
first_purchase = df.groupby(CUSTOMER_COLUMN)[DATE_COLUMN].min().reset_index()
first_purchase.columns = [CUSTOMER_COLUMN, 'FirstPurchaseDate']
# Extract cohort year-month
first_purchase['Cohort'] = first_purchase['FirstPurchaseDate'].dt.to_period('M')
# Merge back to original data
df_with_cohort = df.merge(first_purchase[[CUSTOMER_COLUMN, 'Cohort']], on=CUSTOMER_COLUMN)
# Calculate period number (months since first purchase)
df_with_cohort['Period'] = df_with_cohort[DATE_COLUMN].dt.to_period('M')
df_with_cohort['CohortPeriod'] = (df_with_cohort['Period'] - df_with_cohort['Cohort']).apply(attrgetter('n'))
return df_with_cohort
def calculate_cohort_metrics(df_with_cohort):
"""
Calculate cohort retention metrics
Args:
df_with_cohort: DataFrame with Cohort and CohortPeriod columns
Returns:
DataFrame: Cohort metrics by period
"""
from config import REVENUE_COLUMN, CUSTOMER_COLUMN
# Customer count by cohort and period
cohort_size = df_with_cohort.groupby('Cohort')[CUSTOMER_COLUMN].nunique()
# Revenue by cohort and period
cohort_revenue = df_with_cohort.groupby(['Cohort', 'CohortPeriod']).agg({
CUSTOMER_COLUMN: 'nunique',
REVENUE_COLUMN: 'sum'
}).reset_index()
cohort_revenue.columns = ['Cohort', 'Period', 'Customers', 'Revenue']
# Calculate retention rates
cohort_retention = []
for cohort in cohort_revenue['Cohort'].unique():
cohort_data = cohort_revenue[cohort_revenue['Cohort'] == cohort].copy()
initial_customers = cohort_data[cohort_data['Period'] == 0]['Customers'].values[0]
cohort_data['Retention_Rate'] = (cohort_data['Customers'] / initial_customers) * 100
cohort_data['Revenue_Retention'] = cohort_data['Revenue'] / cohort_data[cohort_data['Period'] == 0]['Revenue'].values[0] * 100
cohort_retention.append(cohort_data)
return pd.concat(cohort_retention, ignore_index=True)
# ============================================================================
# MAIN ANALYSIS FUNCTION
# ============================================================================
def main():
"""Main analysis function"""
print(f"\n{'='*60}")
print(f"{ANALYSIS_NAME}")
print(f"{'='*60}\n")
# 1. Load data
print("Loading data...")
try:
df = load_sales_data(get_data_path())
print(f"Loaded {len(df):,} transactions")
except Exception as e:
print(f"ERROR loading data: {e}")
return
# 2. Validate
is_valid, msg = validate_data_structure(df)
if not is_valid:
print(f"ERROR: {msg}")
return
if CUSTOMER_COLUMN not in df.columns:
print(f"ERROR: Customer column '{CUSTOMER_COLUMN}' not found")
return
# 3. Apply filters
df = apply_exclusion_filters(df)
df = df[df['Year'] >= MIN_YEAR]
if DATE_COLUMN in df.columns:
df = df[df[DATE_COLUMN] <= MAX_DATE]
# 4. Create cohorts
print("\nCreating customer cohorts...")
df_cohort = create_cohorts(df)
# 5. Calculate cohort metrics
print("Calculating cohort metrics...")
cohort_metrics = calculate_cohort_metrics(df_cohort)
# 6. Print summary
print("\nCohort Summary:")
print("-" * 60)
for cohort in sorted(cohort_metrics['Cohort'].unique())[:5]: # Show top 5 cohorts
cohort_data = cohort_metrics[cohort_metrics['Cohort'] == cohort]
period_0 = cohort_data[cohort_data['Period'] == 0]
if len(period_0) > 0:
initial_customers = period_0['Customers'].values[0]
initial_revenue = period_0['Revenue'].values[0]
print(f"\n{cohort}:")
print(f" Initial: {initial_customers:,} customers, {format_currency(initial_revenue)}")
# Show retention at period 12
period_12 = cohort_data[cohort_data['Period'] == 12]
if len(period_12) > 0:
retention = period_12['Retention_Rate'].values[0]
revenue_ret = period_12['Revenue_Retention'].values[0]
print(f" Period 12: {retention:.1f}% customer retention, {revenue_ret:.1f}% revenue retention")
# 7. Create visualizations
print("\nGenerating charts...")
ensure_directories()
# Heatmap: Customer retention
pivot_retention = cohort_metrics.pivot_table(
index='Cohort',
columns='Period',
values='Retention_Rate',
aggfunc='mean'
)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
# Retention heatmap
sns.heatmap(pivot_retention, annot=True, fmt='.0f', cmap='YlOrRd', ax=ax1, cbar_kws={'label': 'Retention %'})
ax1.set_title('Customer Retention by Cohort\n(Period 0 = 100%)', fontsize=12, fontweight='bold')
ax1.set_xlabel('Months Since First Purchase')
ax1.set_ylabel('Cohort')
# Revenue retention heatmap
pivot_revenue = cohort_metrics.pivot_table(
index='Cohort',
columns='Period',
values='Revenue_Retention',
aggfunc='mean'
)
sns.heatmap(pivot_revenue, annot=True, fmt='.0f', cmap='YlGnBu', ax=ax2, cbar_kws={'label': 'Revenue Retention %'})
ax2.set_title('Revenue Retention by Cohort\n(Period 0 = 100%)', fontsize=12, fontweight='bold')
ax2.set_xlabel('Months Since First Purchase')
ax2.set_ylabel('Cohort')
plt.suptitle(f'Cohort Analysis - {COMPANY_NAME}', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
save_chart(fig, 'cohort_analysis.png')
plt.close()
# 8. Validate
print("\nValidating revenue...")
validate_revenue(df, ANALYSIS_NAME)
print(f"\n{ANALYSIS_NAME} complete!")
print(f"Charts saved to: {OUTPUT_DIR}")
# ============================================================================
# RUN ANALYSIS
# ============================================================================
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,213 @@
"""
Example: Customer Segmentation (RFM) Analysis
Example showing customer segmentation using RFM methodology
This example demonstrates:
- Customer-level aggregation
- RFM segmentation (Recency, Frequency, Monetary)
- Segment analysis and visualization
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
# Import utilities
from data_loader import load_sales_data, validate_data_structure
from validate_revenue import validate_revenue
from analysis_utils import (
get_ltm_period_config, apply_exclusion_filters,
setup_revenue_chart, save_chart, format_currency
)
from config import (
OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
get_data_path, COMPANY_NAME, REVENUE_COLUMN, CUSTOMER_COLUMN,
DATE_COLUMN, MIN_YEAR
)
# ============================================================================
# CONFIGURATION
# ============================================================================
ANALYSIS_NAME = "Customer Segmentation (RFM)"
DESCRIPTION = "Customer segmentation using RFM methodology"
# ============================================================================
# RFM SEGMENTATION FUNCTIONS
# ============================================================================
def calculate_rfm_scores(df, analysis_date=None):
"""
Calculate RFM scores for each customer
Args:
df: DataFrame with customer, date, and revenue columns
analysis_date: Reference date for recency calculation (defaults to max date)
Returns:
DataFrame with RFM scores and segment assignment
"""
if analysis_date is None:
analysis_date = df[DATE_COLUMN].max()
# Calculate customer-level metrics
customer_metrics = df.groupby(CUSTOMER_COLUMN).agg({
DATE_COLUMN: ['max', 'count'],
REVENUE_COLUMN: 'sum'
}).reset_index()
customer_metrics.columns = [CUSTOMER_COLUMN, 'LastPurchaseDate', 'Frequency', 'Monetary']
# Calculate Recency (days since last purchase)
customer_metrics['Recency'] = (analysis_date - customer_metrics['LastPurchaseDate']).dt.days
# Score each dimension (1-5 scale, 5 = best)
customer_metrics['R_Score'] = pd.qcut(
customer_metrics['Recency'].rank(method='first'),
q=5, labels=[5, 4, 3, 2, 1], duplicates='drop'
).astype(int)
customer_metrics['F_Score'] = pd.qcut(
customer_metrics['Frequency'].rank(method='first'),
q=5, labels=[1, 2, 3, 4, 5], duplicates='drop'
).astype(int)
customer_metrics['M_Score'] = pd.qcut(
customer_metrics['Monetary'].rank(method='first'),
q=5, labels=[1, 2, 3, 4, 5], duplicates='drop'
).astype(int)
# Calculate RFM score (sum of R, F, M)
customer_metrics['RFM_Score'] = (
customer_metrics['R_Score'] +
customer_metrics['F_Score'] +
customer_metrics['M_Score']
)
# Assign segments
def assign_segment(row):
r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
if r >= 4 and f >= 4 and m >= 4:
return 'Champions'
elif r >= 3 and f >= 3 and m >= 4:
return 'Loyal Customers'
elif r >= 4 and f <= 2:
return 'At Risk'
elif r <= 2:
return 'Hibernating'
elif r >= 3 and f >= 3 and m <= 2:
return 'Potential Loyalists'
else:
return 'Need Attention'
customer_metrics['Segment'] = customer_metrics.apply(assign_segment, axis=1)
return customer_metrics
# ============================================================================
# MAIN ANALYSIS FUNCTION
# ============================================================================
def main():
"""Main analysis function"""
print(f"\n{'='*60}")
print(f"{ANALYSIS_NAME}")
print(f"{'='*60}\n")
# 1. Load data
print("Loading data...")
try:
df = load_sales_data(get_data_path())
print(f"Loaded {len(df):,} transactions")
except Exception as e:
print(f"ERROR loading data: {e}")
return
# 2. Validate data structure
is_valid, msg = validate_data_structure(df)
if not is_valid:
print(f"ERROR: {msg}")
return
if CUSTOMER_COLUMN not in df.columns:
print(f"ERROR: Customer column '{CUSTOMER_COLUMN}' not found in data")
return
print("Data validation passed")
# 3. Apply exclusion filters
df = apply_exclusion_filters(df)
# 4. Filter by date range
df = df[df['Year'] >= MIN_YEAR]
if DATE_COLUMN in df.columns:
df = df[df[DATE_COLUMN] <= MAX_DATE]
# 5. Calculate RFM scores
print("\nCalculating RFM scores...")
rfm_df = calculate_rfm_scores(df)
# 6. Segment summary
print("\nCustomer Segmentation Summary:")
print("-" * 60)
segment_summary = rfm_df.groupby('Segment').agg({
CUSTOMER_COLUMN: 'count',
'Monetary': 'sum'
}).reset_index()
segment_summary.columns = ['Segment', 'Customer Count', 'Total Revenue']
segment_summary = segment_summary.sort_values('Total Revenue', ascending=False)
for _, row in segment_summary.iterrows():
pct_customers = (row['Customer Count'] / len(rfm_df)) * 100
pct_revenue = (row['Total Revenue'] / rfm_df['Monetary'].sum()) * 100
print(f"{row['Segment']:20s}: {row['Customer Count']:5d} customers ({pct_customers:5.1f}%), "
f"{format_currency(row['Total Revenue'])} ({pct_revenue:5.1f}% of revenue)")
# 7. Create visualizations
print("\nGenerating charts...")
ensure_directories()
# Chart 1: Revenue by Segment
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
segment_summary_sorted = segment_summary.sort_values('Total Revenue', ascending=True)
revenue_millions = segment_summary_sorted['Total Revenue'].values / 1e6
ax1.barh(range(len(segment_summary_sorted)), revenue_millions, color='#2E86AB')
ax1.set_yticks(range(len(segment_summary_sorted)))
ax1.set_yticklabels(segment_summary_sorted['Segment'].values)
ax1.set_xlabel('Revenue (Millions USD)')
ax1.set_title('Revenue by Customer Segment', fontsize=12, fontweight='bold')
setup_revenue_chart(ax1)
ax1.set_ylabel('')
# Chart 2: Customer Count by Segment
customer_counts = segment_summary_sorted['Customer Count'].values
ax2.barh(range(len(segment_summary_sorted)), customer_counts, color='#A23B72')
ax2.set_yticks(range(len(segment_summary_sorted)))
ax2.set_yticklabels(segment_summary_sorted['Segment'].values)
ax2.set_xlabel('Number of Customers')
ax2.set_title('Customer Count by Segment', fontsize=12, fontweight='bold')
ax2.set_ylabel('')
ax2.grid(True, alpha=0.3)
plt.suptitle(f'Customer Segmentation Analysis - {COMPANY_NAME}',
fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
save_chart(fig, 'customer_segmentation.png')
plt.close()
# 8. Validate revenue
print("\nValidating revenue...")
validate_revenue(df, ANALYSIS_NAME)
print(f"\n{ANALYSIS_NAME} complete!")
print(f"Charts saved to: {OUTPUT_DIR}")
# ============================================================================
# RUN ANALYSIS
# ============================================================================
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,203 @@
"""
Example: Product Performance Analysis
Example showing product mix and performance analysis
This example demonstrates:
- Product-level aggregation
- Product performance metrics
- Product mix visualization
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
# Import utilities
from data_loader import load_sales_data, validate_data_structure
from validate_revenue import validate_revenue
from analysis_utils import (
get_ltm_period_config, calculate_annual_metrics,
apply_exclusion_filters, setup_revenue_chart, save_chart,
format_currency, sort_mixed_years
)
from config import (
OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
get_data_path, COMPANY_NAME, REVENUE_COLUMN, ITEM_COLUMN,
DATE_COLUMN, MIN_YEAR, QUANTITY_COLUMN
)
# ============================================================================
# CONFIGURATION
# ============================================================================
ANALYSIS_NAME = "Product Performance Analysis"
DESCRIPTION = "Product mix and performance analysis"
# ============================================================================
# MAIN ANALYSIS FUNCTION
# ============================================================================
def main():
"""Main analysis function"""
print(f"\n{'='*60}")
print(f"{ANALYSIS_NAME}")
print(f"{'='*60}\n")
# 1. Load data
print("Loading data...")
try:
df = load_sales_data(get_data_path())
print(f"Loaded {len(df):,} transactions")
except Exception as e:
print(f"ERROR loading data: {e}")
return
# 2. Validate data structure
is_valid, msg = validate_data_structure(df)
if not is_valid:
print(f"ERROR: {msg}")
return
if ITEM_COLUMN not in df.columns:
print(f"WARNING: Item column '{ITEM_COLUMN}' not found. Using transaction-level analysis.")
# Create a dummy item column for demonstration
df[ITEM_COLUMN] = 'All Products'
print("Data validation passed")
# 3. Apply exclusion filters
df = apply_exclusion_filters(df)
# 4. Filter by date range
df = df[df['Year'] >= MIN_YEAR]
if DATE_COLUMN in df.columns:
df = df[df[DATE_COLUMN] <= MAX_DATE]
# 5. Setup LTM period
ltm_start, ltm_end = get_ltm_period_config()
# 6. Product performance summary
print("\nCalculating product performance...")
# Get most recent period data
if ltm_start and ltm_end and 'YearMonth' in df.columns:
recent_data = df[(df['YearMonth'] >= ltm_start) & (df['YearMonth'] <= ltm_end)]
period_label = f"LTM {ltm_end}"
else:
recent_year = df['Year'].max()
recent_data = df[df['Year'] == recent_year]
period_label = str(recent_year)
# Product-level metrics
product_metrics = recent_data.groupby(ITEM_COLUMN).agg({
REVENUE_COLUMN: ['sum', 'count'],
QUANTITY_COLUMN: 'sum' if QUANTITY_COLUMN in df.columns else 'count'
}).reset_index()
product_metrics.columns = [ITEM_COLUMN, 'Revenue', 'Transaction_Count', 'Quantity']
# Calculate average price per unit if quantity available
if QUANTITY_COLUMN in df.columns:
product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Quantity'].replace(0, np.nan)
else:
product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Transaction_Count']
# Sort by revenue
product_metrics = product_metrics.sort_values('Revenue', ascending=False)
# Top products summary
print(f"\nTop 10 Products by Revenue ({period_label}):")
print("-" * 80)
top_10 = product_metrics.head(10)
total_revenue = product_metrics['Revenue'].sum()
for idx, row in top_10.iterrows():
pct = (row['Revenue'] / total_revenue) * 100
print(f"{row[ITEM_COLUMN]:30s}: {format_currency(row['Revenue']):>12s} ({pct:5.1f}%)")
# 7. Annual product trends (if multiple years available)
if len(df['Year'].unique()) > 1:
print("\nCalculating annual product trends...")
def calculate_product_metrics(year_data):
"""Calculate product metrics for a year"""
product_revenue = year_data.groupby(ITEM_COLUMN)[REVENUE_COLUMN].sum()
# Get top 5 products
top_5 = product_revenue.nlargest(5)
return dict(top_5)
annual_product_df = calculate_annual_metrics(df, calculate_product_metrics, ltm_start, ltm_end)
# 8. Create visualizations
print("\nGenerating charts...")
ensure_directories()
# Chart 1: Top Products Revenue (Bar Chart)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
top_10_revenue = top_10['Revenue'].values / 1e6
top_10_names = top_10[ITEM_COLUMN].values
ax1.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
ax1.set_yticks(range(len(top_10)))
ax1.set_yticklabels([name[:30] + '...' if len(name) > 30 else name for name in top_10_names])
ax1.set_xlabel('Revenue (Millions USD)')
ax1.set_title(f'Top 10 Products by Revenue\n({period_label})', fontsize=12, fontweight='bold')
setup_revenue_chart(ax1)
ax1.set_ylabel('')
# Chart 2: Revenue Distribution (Pie Chart for top 10)
if len(product_metrics) > 10:
other_revenue = product_metrics.iloc[10:]['Revenue'].sum()
pie_data = list(top_10['Revenue'].values) + [other_revenue]
pie_labels = list(top_10[ITEM_COLUMN].values) + ['Other']
else:
pie_data = product_metrics['Revenue'].values
pie_labels = product_metrics[ITEM_COLUMN].values
pie_data_millions = [x / 1e6 for x in pie_data]
ax2.pie(pie_data_millions, labels=pie_labels, autopct='%1.1f%%', startangle=90)
ax2.set_title('Revenue Distribution\n(Top Products)', fontsize=12, fontweight='bold')
plt.suptitle(f'Product Performance Analysis - {COMPANY_NAME}',
fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
save_chart(fig, 'product_performance.png')
plt.close()
else:
# Single chart if only one year
print("\nGenerating chart...")
ensure_directories()
fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
top_10_revenue = top_10['Revenue'].values / 1e6
top_10_names = top_10[ITEM_COLUMN].values
ax.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
ax.set_yticks(range(len(top_10)))
ax.set_yticklabels([name[:40] + '...' if len(name) > 40 else name for name in top_10_names])
ax.set_xlabel('Revenue (Millions USD)')
ax.set_title(f'Top 10 Products by Revenue - {COMPANY_NAME}\n({period_label})',
fontsize=14, fontweight='bold')
setup_revenue_chart(ax)
ax.set_ylabel('')
plt.tight_layout()
save_chart(fig, 'product_performance.png')
plt.close()
# 9. Validate revenue
print("\nValidating revenue...")
validate_revenue(df, ANALYSIS_NAME)
print(f"\n{ANALYSIS_NAME} complete!")
print(f"Charts saved to: {OUTPUT_DIR}")
# ============================================================================
# RUN ANALYSIS
# ============================================================================
if __name__ == "__main__":
main()