From cf0b596449046f706cd3273d3d8d0de9f2267e6d Mon Sep 17 00:00:00 2001
From: Jonathan Pressnell <jpressnell@bluepointcapital.com>
Date: Fri, 6 Feb 2026 09:16:34 -0500
Subject: [PATCH] Initial commit: sales analysis template

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .cursor/rules/advanced_analysis_patterns.md | 307 ++++++++++
 .cursor/rules/ai_assistant_guide.md         | 316 +++++++++++
 .cursor/rules/analysis_patterns.md          | 161 ++++++
 .cursor/rules/chart_formatting.md           | 111 ++++
 .cursor/rules/code_quality.md               | 389 +++++++++++++
 .cursor/rules/common_errors.md              | 109 ++++
 .cursor/rules/data_loading.md               |  69 +++
 .cursor/rules/error_handling.md             | 276 +++++++++
 .cursor/rules/ltm_methodology.md            |  89 +++
 EXAMPLES.md                                 | 203 +++++++
 QUICK_START.md                              | 175 ++++++
 README.md                                   | 589 ++++++++++++++++++++
 SETUP_CHECKLIST.md                          | 118 ++++
 TEMPLATE_OVERVIEW.md                        | 150 +++++
 TEMPLATE_SUMMARY.md                         | 254 +++++++++
 analysis_template.py                        | 147 +++++
 analysis_utils.py                           | 510 +++++++++++++++++
 config.py                                   | 277 +++++++++
 config_validator.py                         | 214 +++++++
 data_loader.py                              | 224 ++++++++
 data_processing.py                          | 285 ++++++++++
 data_quality.py                             | 344 ++++++++++++
 examples/annual_revenue_trend.py            | 134 +++++
 examples/cohort_analysis.py                 | 218 ++++++++
 examples/customer_segmentation.py           | 213 +++++++
 examples/product_performance.py             | 203 +++++++
 export_utils.py                             | 238 ++++++++
 generate_sample_data.py                     | 184 ++++++
 logger_config.py                            | 197 +++++++
 report_generator.py                         | 228 ++++++++
 requirements.txt                            |  30 +
 run_all_analyses.py                         | 185 ++++++
 setup_wizard.py                             | 240 ++++++++
 statistical_utils.py                        | 321 +++++++++++
 tests/test_analysis_utils.py                |  85 +++
 tests/test_config_validator.py              |  45 ++
 tests/test_data_loader.py                   |  68 +++
 validate_revenue.py                         |  95 ++++
 38 files changed, 8001 insertions(+)
 create mode 100644 .cursor/rules/advanced_analysis_patterns.md
 create mode 100644 .cursor/rules/ai_assistant_guide.md
 create mode 100644 .cursor/rules/analysis_patterns.md
 create mode 100644 .cursor/rules/chart_formatting.md
 create mode 100644 .cursor/rules/code_quality.md
 create mode 100644 .cursor/rules/common_errors.md
 create mode 100644 .cursor/rules/data_loading.md
 create mode 100644 .cursor/rules/error_handling.md
 create mode 100644 .cursor/rules/ltm_methodology.md
 create mode 100644 EXAMPLES.md
 create mode 100644 QUICK_START.md
 create mode 100644 README.md
 create mode 100644 SETUP_CHECKLIST.md
 create mode 100644 TEMPLATE_OVERVIEW.md
 create mode 100644 TEMPLATE_SUMMARY.md
 create mode 100644 analysis_template.py
 create mode 100644 analysis_utils.py
 create mode 100644 config.py
 create mode 100644 config_validator.py
 create mode 100644 data_loader.py
 create mode 100644 data_processing.py
 create mode 100644 data_quality.py
 create mode 100644 examples/annual_revenue_trend.py
 create mode 100644 examples/cohort_analysis.py
 create mode 100644 examples/customer_segmentation.py
 create mode 100644 examples/product_performance.py
 create mode 100644 export_utils.py
 create mode 100644 generate_sample_data.py
 create mode 100644 logger_config.py
 create mode 100644 report_generator.py
 create mode 100644 requirements.txt
 create mode 100644 run_all_analyses.py
 create mode 100644 setup_wizard.py
 create mode 100644 statistical_utils.py
 create mode 100644 tests/test_analysis_utils.py
 create mode 100644 tests/test_config_validator.py
 create mode 100644 tests/test_data_loader.py
 create mode 100644 validate_revenue.py

diff --git a/.cursor/rules/advanced_analysis_patterns.md b/.cursor/rules/advanced_analysis_patterns.md
new file mode 100644
index 0000000..ae2f504
--- /dev/null
+++ b/.cursor/rules/advanced_analysis_patterns.md
@@ -0,0 +1,307 @@
+# Advanced Analysis Patterns
+
+This document provides patterns for sophisticated, production-grade analyses that leverage the full capabilities of the template framework.
+
+## ⭐ Using Cursor AI Effectively
+
+When working in Cursor, you can ask the AI to:
+- "Create a cohort analysis script using the template patterns"
+- "Add statistical significance testing to this analysis"
+- "Generate a multi-dimensional analysis with product, customer, and geography"
+- "Create a forecasting analysis with confidence intervals"
+
+The AI will automatically use these patterns and utilities.
+
+## Advanced Analysis Types
+
+### 1. Multi-Dimensional Analysis
+
+**Pattern:** Analyze across multiple dimensions simultaneously (e.g., Product × Customer × Geography)
+
+```python
+from data_loader import load_sales_data
+from analysis_utils import calculate_annual_metrics, get_ltm_period_config
+from config import REVENUE_COLUMN, ITEM_COLUMN, CUSTOMER_COLUMN, REGION_COLUMN
+
+df = load_sales_data(get_data_path())
+
+# Multi-dimensional pivot
+pivot = df.pivot_table(
+    index=[ITEM_COLUMN, CUSTOMER_COLUMN],
+    columns=REGION_COLUMN,
+    values=REVENUE_COLUMN,
+    aggfunc='sum',
+    fill_value=0
+)
+
+# Or use data_processing helper
+from data_processing import create_pivot_table
+pivot = create_pivot_table(
+    df,
+    index=[ITEM_COLUMN, CUSTOMER_COLUMN],
+    columns=REGION_COLUMN,
+    values=REVENUE_COLUMN
+)
+```
+
+### 2. Cohort Analysis with Retention Metrics
+
+**Pattern:** Track customer cohorts over time with retention and revenue metrics
+
+```python
+from examples.cohort_analysis import create_cohorts, calculate_cohort_metrics
+
+df_cohort = create_cohorts(df)
+cohort_metrics = calculate_cohort_metrics(df_cohort)
+
+# Calculate Net Revenue Retention (NRR)
+nrr = cohort_metrics.groupby('Cohort').agg({
+    'Revenue_Retention': lambda x: x.iloc[-1] if len(x) > 0 else 0
+})
+```
+
+### 3. Statistical Significance Testing
+
+**Pattern:** Compare segments with statistical tests
+
+```python
+from statistical_utils import test_statistical_significance
+
+# Compare two groups
+group1 = df[df['Segment'] == 'A'][REVENUE_COLUMN]
+group2 = df[df['Segment'] == 'B'][REVENUE_COLUMN]
+
+result = test_statistical_significance(group1, group2)
+if result['significant']:
+    print(f"Significant difference (p={result['p_value']:.4f})")
+```
+
+### 4. Price-Volume-Mix (PVM) Decomposition
+
+**Pattern:** Decompose revenue changes into price, volume, and mix effects
+
+```python
+from config import QUANTITY_COLUMN, REVENUE_COLUMN
+
+def pvm_decomposition(df_base, df_current):
+    """Decompose revenue change into price, volume, mix effects"""
+    base_price = df_base[REVENUE_COLUMN].sum() / df_base[QUANTITY_COLUMN].sum()
+    current_price = df_current[REVENUE_COLUMN].sum() / df_current[QUANTITY_COLUMN].sum()
+    
+    base_volume = df_base[QUANTITY_COLUMN].sum()
+    current_volume = df_current[QUANTITY_COLUMN].sum()
+    
+    # Price effect
+    price_effect = (current_price - base_price) * base_volume
+    
+    # Volume effect
+    volume_effect = (current_volume - base_volume) * base_price
+    
+    # Mix effect (residual)
+    total_change = df_current[REVENUE_COLUMN].sum() - df_base[REVENUE_COLUMN].sum()
+    mix_effect = total_change - price_effect - volume_effect
+    
+    return {
+        'price_effect': price_effect,
+        'volume_effect': volume_effect,
+        'mix_effect': mix_effect,
+        'total_change': total_change
+    }
+```
+
+### 5. Time Series Forecasting
+
+**Pattern:** Forecast future revenue with confidence intervals
+
+```python
+from data_processing import prepare_time_series
+from statistical_utils import calculate_confidence_interval
+
+# Prepare time series
+ts = prepare_time_series(df, freq='M')
+
+# Simple forecast (extend trend)
+from scipy import stats
+x = np.arange(len(ts))
+slope, intercept, r_value, p_value, std_err = stats.linregress(x, ts.values)
+
+# Forecast next 12 months
+future_x = np.arange(len(ts), len(ts) + 12)
+forecast = slope * future_x + intercept
+
+# Calculate confidence intervals
+ci = calculate_confidence_interval(ts, confidence=0.95)
+```
+
+### 6. Customer Lifetime Value (CLV) Analysis
+
+**Pattern:** Calculate CLV using historical data
+
+```python
+from config import CUSTOMER_COLUMN, REVENUE_COLUMN, DATE_COLUMN
+
+def calculate_clv(df, years=3):
+    """Calculate customer lifetime value"""
+    customer_metrics = df.groupby(CUSTOMER_COLUMN).agg({
+        REVENUE_COLUMN: 'sum',
+        DATE_COLUMN: ['min', 'max', 'count']
+    }).reset_index()
+    
+    customer_metrics.columns = [CUSTOMER_COLUMN, 'Total_Revenue', 'First_Purchase', 'Last_Purchase', 'Order_Count']
+    
+    # Calculate customer age (years)
+    customer_metrics['Customer_Age_Years'] = (
+        (customer_metrics['Last_Purchase'] - customer_metrics['First_Purchase']).dt.days / 365.25
+    )
+    
+    # Annual revenue
+    customer_metrics['Annual_Revenue'] = customer_metrics['Total_Revenue'] / customer_metrics['Customer_Age_Years'].replace(0, 1)
+    
+    # Projected CLV
+    customer_metrics['CLV'] = customer_metrics['Annual_Revenue'] * years
+    
+    return customer_metrics
+```
+
+### 7. Market Basket Analysis
+
+**Pattern:** Find product associations and cross-sell opportunities
+
+```python
+from mlxtend.frequent_patterns import apriori, association_rules
+from mlxtend.preprocessing import TransactionEncoder
+
+# Prepare transaction data
+transactions = df.groupby(INVOICE_NUMBER_COLUMN)[ITEM_COLUMN].apply(list).tolist()
+
+# Encode transactions
+te = TransactionEncoder()
+te_ary = te.fit(transactions).transform(transactions)
+df_encoded = pd.DataFrame(te_ary, columns=te.columns_)
+
+# Find frequent itemsets
+frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)
+
+# Generate association rules
+rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
+```
+
+### 8. Segmentation with Machine Learning
+
+**Pattern:** Advanced customer segmentation using clustering
+
+```python
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+
+# Prepare features
+features = df.groupby(CUSTOMER_COLUMN).agg({
+    REVENUE_COLUMN: ['sum', 'mean', 'count'],
+    DATE_COLUMN: lambda x: (x.max() - x.min()).days
+}).reset_index()
+features.columns = [CUSTOMER_COLUMN, 'Total_Revenue', 'Avg_Order', 'Order_Count', 'Customer_Tenure']
+
+# Scale features
+scaler = StandardScaler()
+features_scaled = scaler.fit_transform(features[['Total_Revenue', 'Avg_Order', 'Order_Count', 'Customer_Tenure']])
+
+# Cluster
+kmeans = KMeans(n_clusters=5, random_state=42)
+features['Segment'] = kmeans.fit_predict(features_scaled)
+```
+
+### 9. Anomaly Detection
+
+**Pattern:** Identify unusual patterns in data
+
+```python
+from statistical_utils import calculate_z_score
+
+# Calculate z-scores for revenue
+mean_revenue = df[REVENUE_COLUMN].mean()
+std_revenue = df[REVENUE_COLUMN].std()
+
+df['Revenue_Z_Score'] = df[REVENUE_COLUMN].apply(
+    lambda x: calculate_z_score(x, mean_revenue, std_revenue)
+)
+
+# Flag anomalies (|z| > 3)
+df['Is_Anomaly'] = df['Revenue_Z_Score'].abs() > 3
+```
+
+### 10. Competitive Analysis Framework
+
+**Pattern:** Compare performance across dimensions
+
+```python
+from statistical_utils import calculate_yoy_growth, calculate_cagr
+
+def competitive_analysis(df, dimension_col):
+    """Compare performance across dimension (e.g., products, regions)"""
+    analysis = df.groupby(dimension_col).agg({
+        REVENUE_COLUMN: ['sum', 'mean', 'count']
+    }).reset_index()
+    analysis.columns = [dimension_col, 'Total_Revenue', 'Avg_Order', 'Order_Count']
+    
+    # Calculate growth rates
+    for year in sorted(df['Year'].unique())[1:]:
+        prev_year = year - 1
+        current = df[df['Year'] == year].groupby(dimension_col)[REVENUE_COLUMN].sum()
+        previous = df[df['Year'] == prev_year].groupby(dimension_col)[REVENUE_COLUMN].sum()
+        
+        growth = calculate_yoy_growth(current, previous)
+        analysis[f'Growth_{year}'] = growth
+    
+    return analysis
+```
+
+## Best Practices for Advanced Analyses
+
+1. **Always validate data quality first:**
+   ```python
+   from data_quality import generate_data_quality_report
+   report = generate_data_quality_report(df)
+   ```
+
+2. **Use logging for complex analyses:**
+   ```python
+   from logger_config import get_logger
+   logger = get_logger('advanced_analysis')
+   logger.info("Starting complex analysis...")
+   ```
+
+3. **Export intermediate results:**
+   ```python
+   from export_utils import export_to_excel
+   export_to_excel(intermediate_df, 'intermediate_results.xlsx')
+   ```
+
+4. **Generate comprehensive reports:**
+   ```python
+   from report_generator import generate_pdf_report
+   generate_pdf_report(charts=['chart1.png', 'chart2.png'], summary_data=summary)
+   ```
+
+5. **Test statistical significance:**
+   ```python
+   from statistical_utils import test_statistical_significance
+   # Always test before making conclusions
+   ```
+
+## Cursor AI Prompts for Advanced Analyses
+
+When using Cursor, try these prompts:
+
+- **"Create a cohort retention analysis with heatmaps"**
+- **"Build a price-volume-mix decomposition analysis"**
+- **"Generate a customer lifetime value analysis with segmentation"**
+- **"Create a forecasting model with confidence intervals"**
+- **"Build a multi-dimensional analysis across product, customer, and geography"**
+- **"Create an anomaly detection analysis for unusual transactions"**
+
+The AI will automatically use these patterns and the template utilities.
+
+---
+
+**Last Updated:** January 2026  
+**For:** Advanced users and AI-assisted development
diff --git a/.cursor/rules/ai_assistant_guide.md b/.cursor/rules/ai_assistant_guide.md
new file mode 100644
index 0000000..6b382ff
--- /dev/null
+++ b/.cursor/rules/ai_assistant_guide.md
@@ -0,0 +1,316 @@
+# AI Assistant Guide for Sales Analysis Template
+
+This guide helps you effectively use Cursor's AI assistant to create sophisticated sales analyses.
+
+## 🎯 Quick Start with AI
+
+### Basic Prompt Structure
+
+When asking the AI to create an analysis, use this structure:
+
+```
+Create a [ANALYSIS_TYPE] analysis that:
+1. [Specific requirement 1]
+2. [Specific requirement 2]
+3. Uses the sales_analysis_template patterns
+4. Includes [specific visualizations/metrics]
+```
+
+### Example Prompts
+
+**Simple Analysis:**
+```
+Create an annual revenue trend analysis using the template patterns, 
+with LTM support and proper chart formatting.
+```
+
+**Advanced Analysis:**
+```
+Create a customer cohort retention analysis that:
+1. Groups customers by first purchase month
+2. Calculates retention rates for 12 periods
+3. Shows revenue retention metrics
+4. Creates heatmap visualizations
+5. Uses the template's cohort analysis patterns
+```
+
+**Multi-Dimensional Analysis:**
+```
+Create a product performance analysis across regions that:
+1. Analyzes top products by revenue
+2. Shows regional distribution
+3. Calculates growth rates by region
+4. Creates multi-panel visualizations
+5. Exports results to Excel
+```
+
+## 📋 Template-Aware Prompts
+
+The AI automatically knows about:
+- `data_loader.py` - Always use this for loading data
+- `analysis_utils.py` - Use utilities for formatting, LTM, etc.
+- `config.py` - Use config values, never hardcode
+- Template patterns - Follows best practices automatically
+
+### What the AI Knows
+
+When you mention the template, the AI will:
+- ✅ Use `load_sales_data()` instead of `pd.read_csv()`
+- ✅ Use `setup_revenue_chart()` for charts
+- ✅ Divide revenue by 1e6 before plotting
+- ✅ Use config values from `config.py`
+- ✅ Apply exclusion filters if configured
+- ✅ Validate data after loading
+- ✅ Use LTM patterns correctly
+
+## 🔧 Common AI Tasks
+
+### 1. Create New Analysis Script
+
+**Prompt:**
+```
+Create a new analysis script called [name].py that:
+- Follows the template structure
+- Analyzes [specific metric/dimension]
+- Creates [type of visualization]
+- Uses template utilities
+```
+
+**AI will:**
+- Copy structure from `analysis_template.py`
+- Use proper imports
+- Follow template patterns
+- Include validation
+
+### 2. Add Advanced Features
+
+**Prompt:**
+```
+Add statistical significance testing to [analysis].py:
+- Compare [group1] vs [group2]
+- Show p-values and confidence intervals
+- Use statistical_utils functions
+```
+
+### 3. Fix Common Issues
+
+**Prompt:**
+```
+Fix the chart formatting in [analysis].py - it's showing scientific notation.
+```
+
+**AI will:**
+- Add `data / 1e6` conversion
+- Use `setup_revenue_chart()`
+- Fix formatting issues
+
+### 4. Enhance Existing Analysis
+
+**Prompt:**
+```
+Enhance [analysis].py to:
+- Add export to Excel functionality
+- Include data quality checks
+- Add logging
+- Generate PDF report
+```
+
+## 🚀 Advanced AI Prompts
+
+### Multi-Step Analysis
+
+```
+Create a comprehensive customer analysis that:
+1. Segments customers using RFM
+2. Calculates CLV for each segment
+3. Identifies at-risk customers
+4. Creates cohort retention analysis
+5. Generates PDF report with all charts
+```
+
+### Data Quality First
+
+```
+Before running the analysis, check data quality:
+1. Run data quality report
+2. Fix any critical issues
+3. Validate configuration
+4. Then proceed with analysis
+```
+
+### Statistical Analysis
+
+```
+Add statistical analysis to [analysis].py:
+- Calculate year-over-year growth with significance testing
+- Show confidence intervals for forecasts
+- Test differences between segments
+- Use statistical_utils functions
+```
+
+## 💡 Pro Tips
+
+### 1. Reference Existing Examples
+
+```
+Create an analysis similar to examples/customer_segmentation.py 
+but for product segmentation instead.
+```
+
+### 2. Use Template Utilities
+
+```
+Use the template's export_utils to save results to Excel,
+and report_generator to create a PDF report.
+```
+
+### 3. Leverage Cursor Rules
+
+The AI automatically reads `.cursor/rules/` files, so you can say:
+```
+Follow the advanced_analysis_patterns.md guide to create
+a price-volume-mix decomposition analysis.
+```
+
+### 4. Iterative Development
+
+```
+Start with a basic version, then enhance it:
+1. First version: Simple revenue trend
+2. Add: Statistical significance
+3. Add: Export functionality
+4. Add: PDF report generation
+```
+
+## 🎨 Visualization Prompts
+
+### Create Specific Chart Types
+
+```
+Create a heatmap showing [metric] across [dimension1] and [dimension2],
+using seaborn and following template chart formatting.
+```
+
+```
+Create an interactive Plotly chart for [analysis],
+saving it as HTML using the template's interactive chart functions.
+```
+
+### Multi-Panel Visualizations
+
+```
+Create a 2x2 subplot showing:
+- Top left: Revenue trend
+- Top right: Customer count trend
+- Bottom left: Average order value
+- Bottom right: Growth rates
+All using template chart formatting.
+```
+
+## 📊 Data Analysis Prompts
+
+### Cohort Analysis
+
+```
+Create a cohort analysis that:
+1. Groups customers by first purchase month
+2. Tracks retention for 12 periods
+3. Calculates revenue retention
+4. Creates retention heatmap
+5. Uses examples/cohort_analysis.py as reference
+```
+
+### Forecasting
+
+```
+Create a revenue forecasting analysis:
+1. Prepare time series data
+2. Fit trend model
+3. Forecast next 12 months
+4. Show confidence intervals
+5. Use statistical_utils for calculations
+```
+
+### Segmentation
+
+```
+Create an advanced customer segmentation:
+1. Calculate RFM scores
+2. Apply clustering algorithm
+3. Analyze segment characteristics
+4. Create segment visualizations
+5. Export segment data to Excel
+```
+
+## 🔍 Debugging with AI
+
+### Fix Errors
+
+```
+I'm getting [error message] in [file].py.
+Fix it using template best practices.
+```
+
+### Optimize Performance
+
+```
+Optimize [analysis].py for large datasets:
+- Use efficient pandas operations
+- Add progress indicators
+- Consider data sampling if needed
+```
+
+### Improve Code Quality
+
+```
+Refactor [analysis].py to:
+- Use more template utilities
+- Follow template patterns better
+- Add proper error handling
+- Include logging
+```
+
+## 📝 Documentation Prompts
+
+### Add Documentation
+
+```
+Add comprehensive docstrings to [analysis].py following
+the template's documentation style.
+```
+
+### Create README
+
+```
+Create a README for [analysis].py explaining:
+- What it does
+- How to run it
+- What outputs it generates
+- Dependencies required
+```
+
+## 🎯 Best Practices for AI Interaction
+
+1. **Be Specific:** Mention template files and utilities by name
+2. **Reference Examples:** Point to existing examples when relevant
+3. **Iterate:** Start simple, then add complexity
+4. **Use Template Terms:** Mention "LTM", "config values", "template patterns"
+5. **Ask for Validation:** Request data quality checks and validation
+
+## Example Full Workflow
+
+```
+1. "Check my configuration using config_validator.py"
+2. "Run data quality report on my data"
+3. "Create a revenue trend analysis using template patterns"
+4. "Add statistical significance testing to the analysis"
+5. "Export results to Excel and generate PDF report"
+6. "Create a cohort analysis similar to the example"
+```
+
+The AI will guide you through each step using template best practices.
+
+---
+
+**Last Updated:** January 2026  
+**For:** Cursor AI users working with sales_analysis_template
diff --git a/.cursor/rules/analysis_patterns.md b/.cursor/rules/analysis_patterns.md
new file mode 100644
index 0000000..1d1cd67
--- /dev/null
+++ b/.cursor/rules/analysis_patterns.md
@@ -0,0 +1,161 @@
+# Common Analysis Patterns
+
+## ⭐ RECOMMENDED: Use Utilities
+
+**Always prefer `analysis_utils.py` and `config.py` over manual implementations:**
+- Consistent formatting
+- Fewer errors
+- Easier maintenance
+- Standardized output
+
+## Standard Script Structure (Using Utilities)
+
+**RECOMMENDED:** Use `analysis_utils.py` and `config.py` for consistency:
+
+```python
+# 1. IMPORTS
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from data_loader import load_sales_data, validate_data_structure
+from validate_revenue import validate_revenue
+from analysis_utils import (
+    get_ltm_period_config, get_annual_data, calculate_annual_metrics,
+    get_millions_formatter, setup_revenue_chart, save_chart,
+    format_currency, print_annual_summary, sort_mixed_years,
+    apply_exclusion_filters
+)
+from config import (
+    DATA_FILE, OUTPUT_DIR, CHART_SIZES, ensure_directories,
+    get_data_path, REVENUE_COLUMN, COMPANY_NAME
+)
+
+# 2. LOAD DATA (ALWAYS use data_loader)
+df = load_sales_data(get_data_path())
+
+# 3. VALIDATE DATA STRUCTURE
+is_valid, msg = validate_data_structure(df)
+if not is_valid:
+    print(f"ERROR: {msg}")
+    return
+
+# 4. APPLY EXCLUSION FILTERS (if configured)
+df = apply_exclusion_filters(df)
+
+# 5. SETUP LTM (if doing annual comparisons and LTM is enabled)
+ltm_start, ltm_end = get_ltm_period_config()
+
+# 6. DATA PREPARATION
+# Convert columns, filter data, create derived columns
+
+# 7. ANALYSIS LOGIC
+# Use calculate_annual_metrics() for annual aggregations
+
+# 8. VISUALIZATIONS
+# Use setup_revenue_chart() and save_chart() from analysis_utils
+
+# 9. VALIDATION
+validate_revenue(df, "Analysis Name")
+```
+
+## Annual Aggregation Pattern
+
+**RECOMMENDED:** Use `calculate_annual_metrics()` from `analysis_utils.py`:
+
+```python
+from analysis_utils import calculate_annual_metrics, get_ltm_period_config
+from config import REVENUE_COLUMN
+
+ltm_start, ltm_end = get_ltm_period_config()
+
+def calculate_metrics(year_data):
+    """Calculate metrics for a single year"""
+    return {
+        'Revenue': year_data[REVENUE_COLUMN].sum(),
+        # ... other metrics
+    }
+
+annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
+```
+
+## Chart Formatting Pattern
+
+**ALWAYS use this pattern for revenue charts:**
+
+```python
+from analysis_utils import setup_revenue_chart, save_chart
+from config import CHART_SIZES
+
+fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+
+# Divide data by 1e6 BEFORE plotting
+ax.plot(data / 1e6, ...)
+# OR
+ax.bar(x, values / 1e6, ...)
+
+# Apply formatter automatically
+setup_revenue_chart(ax)
+
+# Save chart
+save_chart(fig, 'chart_name.png')
+plt.close()
+```
+
+## Mixed Type Handling
+
+When dealing with year columns that may contain mixed int/str types (e.g., "2025 (LTM 9/2025)"):
+
+```python
+from analysis_utils import sort_mixed_years
+
+# Sort DataFrame by year
+df_sorted = sort_mixed_years(df, year_col='Year')
+
+# For chart labels
+years = df_sorted['Year'].tolist()
+x_pos = range(len(years))
+ax.set_xticks(x_pos)
+ax.set_xticklabels(years, rotation=45, ha='right')
+```
+
+## Price Calculation Pattern
+
+```python
+from analysis_utils import calculate_price_per_unit
+from config import QUANTITY_COLUMN, REVENUE_COLUMN
+
+# Calculate average price per unit (excludes outliers automatically)
+price_per_unit = calculate_price_per_unit(df, QUANTITY_COLUMN, REVENUE_COLUMN)
+```
+
+## Exclusion Filters Pattern
+
+If you need to exclude specific segments (e.g., test accounts, business units):
+
+```python
+from analysis_utils import apply_exclusion_filters
+
+# Configure in config.py:
+# EXCLUSION_FILTERS = {
+#     'enabled': True,
+#     'exclude_by_column': 'Country',
+#     'exclude_values': ['KVT', 'Test']
+# }
+
+df = apply_exclusion_filters(df)
+```
+
+## Using Configuration Values
+
+**ALWAYS use config values instead of hardcoding:**
+
+```python
+from config import (
+    REVENUE_COLUMN,      # Use this instead of 'USD' or 'Amount'
+    CUSTOMER_COLUMN,     # Use this instead of 'Customer'
+    DATE_COLUMN,         # Use this instead of 'InvoiceDate'
+    COMPANY_NAME,        # Use this for titles
+    ANALYSIS_YEARS,      # Use this for year filtering
+    CHART_SIZES,         # Use this for figure sizes
+)
+```
diff --git a/.cursor/rules/chart_formatting.md b/.cursor/rules/chart_formatting.md
new file mode 100644
index 0000000..a6c4905
--- /dev/null
+++ b/.cursor/rules/chart_formatting.md
@@ -0,0 +1,111 @@
+# Chart Formatting Rules
+
+## ⭐ RECOMMENDED: Use analysis_utils.py
+
+**Prefer utility functions:**
+```python
+from analysis_utils import setup_revenue_chart, save_chart, get_millions_formatter
+from config import CHART_SIZES, OUTPUT_DIR
+
+fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+ax.plot(data / 1e6, ...)
+setup_revenue_chart(ax)  # Applies formatter automatically
+save_chart(fig, 'chart.png')  # Saves to charts/ directory
+```
+
+## Revenue Charts: Millions Formatter
+
+**ALWAYS use this pattern for revenue charts:**
+
+```python
+from analysis_utils import setup_revenue_chart
+
+# Divide data by 1e6 BEFORE plotting
+ax.plot(data / 1e6, ...)
+# OR
+ax.bar(x, values / 1e6, ...)
+
+# Apply formatter automatically
+setup_revenue_chart(ax)
+```
+
+**Manual approach (if not using utilities):**
+```python
+from matplotlib.ticker import FuncFormatter
+
+def millions_formatter(x, pos):
+    return f'${x:.1f}m'
+
+ax.plot(data / 1e6, ...)
+ax.yaxis.set_major_formatter(FuncFormatter(millions_formatter))
+ax.set_ylabel('Revenue (Millions USD)')
+```
+
+## Thousands Formatter (for smaller values)
+
+```python
+from analysis_utils import get_thousands_formatter
+
+ax.xaxis.set_major_formatter(get_thousands_formatter())
+ax.barh(x, values / 1e3, ...)
+ax.set_xlabel('Value (Thousands USD)')
+```
+
+## Chart Labeling with LTM
+
+**If LTM is enabled, ALWAYS include LTM notation:**
+
+```python
+from config import get_ltm_label, COMPANY_NAME
+
+title = f'Annual Revenue Trend - {COMPANY_NAME}'
+ltm_label = get_ltm_label()
+if ltm_label:
+    title += f'\n({ltm_label})'
+ax.set_title(title)
+```
+
+## Chart Sizes
+
+**Use predefined sizes from config:**
+```python
+from config import CHART_SIZES
+
+fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])  # (10, 6)
+# Options: 'small' (6, 4), 'medium' (10, 6), 'large' (12, 8), 'wide' (14, 6)
+```
+
+## Common Mistakes
+
+❌ **WRONG:**
+```python
+ax.plot(revenue, ...)  # Shows scientific notation (1e8)
+```
+
+✅ **CORRECT:**
+```python
+ax.plot(revenue / 1e6, ...)  # Divide first
+setup_revenue_chart(ax)  # Then format
+```
+
+## Saving Charts
+
+**ALWAYS use save_chart() utility:**
+```python
+from analysis_utils import save_chart
+
+save_chart(fig, 'chart_name.png')  # Saves to charts/ with proper settings
+plt.close()  # Don't forget to close!
+```
+
+## Chart Styling
+
+**Configure style in config.py:**
+```python
+# In config.py:
+CHART_STYLE = 'seaborn-v0_8'  # Options: 'default', 'ggplot', 'seaborn-v0_8'
+
+# In your script:
+import matplotlib.pyplot as plt
+plt.style.use(CHART_STYLE)  # Apply before creating figures
+```
diff --git a/.cursor/rules/code_quality.md b/.cursor/rules/code_quality.md
new file mode 100644
index 0000000..b37f428
--- /dev/null
+++ b/.cursor/rules/code_quality.md
@@ -0,0 +1,389 @@
+# Code Quality & Best Practices
+
+**Comprehensive guide for writing Cursor-optimized code in the sales analysis template.**
+
+This document combines code quality standards and Cursor best practices to ensure AI assistants can effectively understand, modify, and extend the codebase.
+
+## Type Hints
+
+### When to Use Type Hints
+
+Use type hints for:
+- Function parameters
+- Return values
+- Class attributes
+- Complex data structures
+
+### Example Pattern
+
+```python
+from typing import Dict, List, Optional, Tuple
+import pandas as pd
+
+def calculate_annual_metrics(
+    df: pd.DataFrame,
+    metrics_func: callable,
+    ltm_start: Optional[pd.Period] = None,
+    ltm_end: Optional[pd.Period] = None
+) -> pd.DataFrame:
+    """
+    Calculate annual metrics for all years
+    
+    Args:
+        df: DataFrame with 'Year' and 'YearMonth' columns
+        metrics_func: Function that takes a DataFrame and returns a dict of metrics
+        ltm_start: LTM start period (defaults to config if None)
+        ltm_end: LTM end period (defaults to config if None)
+    
+    Returns:
+        DataFrame with 'Year' index and metric columns
+    """
+    # Implementation
+```
+
+## Docstrings
+
+### Docstring Format
+
+All functions should use Google-style docstrings:
+
+```python
+def function_name(param1: type, param2: type) -> return_type:
+    """
+    Brief description of what the function does.
+    
+    More detailed explanation if needed. Can span multiple lines.
+    Explain any complex logic or important considerations.
+    
+    Args:
+        param1: Description of param1
+        param2: Description of param2
+    
+    Returns:
+        Description of return value
+    
+    Raises:
+        ValueError: When and why this exception is raised
+    
+    Example:
+        >>> result = function_name(value1, value2)
+        >>> print(result)
+        expected_output
+    """
+```
+
+### Required Elements
+
+- Brief one-line summary
+- Detailed description (if needed)
+- Args section (all parameters)
+- Returns section (return value)
+- Raises section (if exceptions raised)
+- Example section (for complex functions)
+
+## Variable Naming
+
+### Conventions
+
+- **Descriptive names:** `customer_revenue` not `cr`
+- **Consistent prefixes:** `df_` for DataFrames, `annual_` for annual metrics
+- **Clear abbreviations:** `ltm` for Last Twelve Months (well-known)
+- **Avoid single letters:** Except for loop variables (`i`, `j`, `k`)
+
+### Good Examples
+
+```python
+# Good
+customer_revenue_by_year = df.groupby(['Customer', 'Year'])[REVENUE_COLUMN].sum()
+annual_metrics_df = calculate_annual_metrics(df, metrics_func)
+ltm_start_period, ltm_end_period = get_ltm_period_config()
+
+# Bad
+cr = df.groupby(['C', 'Y'])['R'].sum()
+am = calc(df, mf)
+s, e = get_ltm()
+```
+
+## Error Messages
+
+### Structure
+
+Error messages should be:
+1. **Specific:** What exactly went wrong
+2. **Actionable:** How to fix it
+3. **Contextual:** Where it occurred
+4. **Helpful:** Reference to documentation
+
+### Good Error Messages
+
+```python
+# Good
+raise ValueError(
+    f"Required column '{REVENUE_COLUMN}' not found in data.\n"
+    f"Available columns: {list(df.columns)}\n"
+    f"Please update config.py REVENUE_COLUMN to match your data.\n"
+    f"See .cursor/rules/data_loading.md for more help."
+)
+
+# Bad
+raise ValueError("Column not found")
+```
+
+## Code Comments
+
+### When to Comment
+
+- Complex logic that isn't immediately obvious
+- Business rules or domain-specific knowledge
+- Workarounds or non-obvious solutions
+- Performance considerations
+- TODO items with context
+
+### Comment Style
+
+```python
+# Good: Explains WHY, not WHAT
+# Use LTM for most recent year to enable apples-to-apples comparison
+# with full calendar years (avoids partial year bias)
+if year == LTM_END_YEAR and LTM_ENABLED:
+    year_data = get_ltm_data(df, ltm_start, ltm_end)
+
+# Bad: States the obvious
+# Check if year equals LTM_END_YEAR
+if year == LTM_END_YEAR:
+```
+
+## Function Design
+
+### Single Responsibility
+
+Each function should do one thing well:
+
+```python
+# Good: Single responsibility
+def calculate_revenue(df: pd.DataFrame) -> float:
+    """Calculate total revenue from DataFrame"""
+    return df[REVENUE_COLUMN].sum()
+
+def calculate_customer_count(df: pd.DataFrame) -> int:
+    """Calculate unique customer count"""
+    return df[CUSTOMER_COLUMN].nunique()
+
+# Bad: Multiple responsibilities
+def calculate_metrics(df):
+    """Calculate revenue and customer count"""
+    revenue = df[REVENUE_COLUMN].sum()
+    customers = df[CUSTOMER_COLUMN].nunique()
+    return revenue, customers
+```
+
+### Function Length
+
+- Keep functions under 50 lines when possible
+- Break complex functions into smaller helper functions
+- Use descriptive function names that explain purpose
+
+## Import Organization
+
+### Standard Order
+
+1. Standard library imports
+2. Third-party imports (pandas, numpy, matplotlib)
+3. Local/template imports (data_loader, analysis_utils, config)
+
+### Example
+
+```python
+# Standard library
+from pathlib import Path
+from typing import Dict, Optional
+from datetime import datetime
+
+# Third-party
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Template imports
+from data_loader import load_sales_data, validate_data_structure
+from analysis_utils import calculate_annual_metrics, setup_revenue_chart
+from config import REVENUE_COLUMN, CHART_SIZES, COMPANY_NAME
+```
+
+## Constants and Configuration
+
+### Use Config Values
+
+```python
+# Good: From config
+from config import REVENUE_COLUMN, DATE_COLUMN
+revenue = df[REVENUE_COLUMN].sum()
+
+# Bad: Hardcoded
+revenue = df['USD'].sum()
+```
+
+### Magic Numbers
+
+Avoid magic numbers - use named constants or config:
+
+```python
+# Good: Named constant
+MILLIONS_DIVISOR = 1e6
+revenue_millions = revenue / MILLIONS_DIVISOR
+
+# Or from config
+CHART_DPI = 300  # In config.py
+
+# Bad: Magic number
+revenue_millions = revenue / 1000000
+```
+
+## Testing Considerations
+
+### Testable Code
+
+Write code that's easy to test:
+- Pure functions when possible (no side effects)
+- Dependency injection for external dependencies
+- Clear inputs and outputs
+
+### Example
+
+```python
+# Good: Testable
+def calculate_metrics(year_data: pd.DataFrame, revenue_col: str) -> Dict:
+    """Calculate metrics - easy to test with sample data"""
+    return {
+        'Revenue': year_data[revenue_col].sum(),
+        'Count': len(year_data)
+    }
+
+# Harder to test: Depends on global config
+def calculate_metrics(year_data):
+    """Uses global REVENUE_COLUMN - harder to test"""
+    return {'Revenue': year_data[REVENUE_COLUMN].sum()}
+```
+
+## AI-Friendly Patterns
+
+### Clear Intent
+
+Code should clearly express intent:
+
+```python
+# Good: Intent is clear
+customers_with_revenue = df[df[REVENUE_COLUMN] > 0][CUSTOMER_COLUMN].unique()
+
+# Less clear: Requires understanding of pandas
+customers_with_revenue = df.loc[df[REVENUE_COLUMN] > 0, CUSTOMER_COLUMN].unique()
+```
+
+### Explicit Over Implicit
+
+```python
+# Good: Explicit
+if LTM_ENABLED and ltm_start is not None and ltm_end is not None:
+    use_ltm = True
+else:
+    use_ltm = False
+
+# Less clear: Implicit truthiness
+use_ltm = LTM_ENABLED and ltm_start and ltm_end
+```
+
+## Documentation for AI
+
+### Help AI Understand Context
+
+Add comments that help AI understand business context:
+
+```python
+# LTM (Last Twelve Months) is used for the most recent partial year
+# to enable fair comparison with full calendar years.
+# Example: If latest data is through Sep 2025, use Oct 2024 - Sep 2025
+if year == LTM_END_YEAR and LTM_ENABLED:
+    # Use 12-month rolling period instead of partial calendar year
+    year_data = get_ltm_data(df, ltm_start, ltm_end)
+```
+
+## Cursor-Specific Optimizations
+
+### AI-Friendly Code Structure
+
+Code should be structured so Cursor AI can:
+1. **Understand intent** - Clear function names and comments
+2. **Generate code** - Follow established patterns
+3. **Fix errors** - Actionable error messages
+4. **Extend functionality** - Modular, reusable functions
+
+### Example: AI-Generated Code Pattern
+
+When AI generates code, it should automatically:
+```python
+# AI recognizes this pattern and replicates it
+def main():
+    # 1. Load data (AI knows to use data_loader)
+    df = load_sales_data(get_data_path())
+    
+    # 2. Validate (AI knows to check structure)
+    is_valid, msg = validate_data_structure(df)
+    if not is_valid:
+        print(f"ERROR: {msg}")
+        return
+    
+    # 3. Apply filters (AI knows exclusion filters)
+    df = apply_exclusion_filters(df)
+    
+    # 4. Analysis logic (AI follows template patterns)
+    # ...
+    
+    # 5. Create charts (AI knows formatting rules)
+    # ...
+    
+    # 6. Validate revenue (AI knows to validate)
+    validate_revenue(df, ANALYSIS_NAME)
+```
+
+### Help AI Generate Better Code
+
+Add context comments that help AI:
+```python
+# LTM (Last Twelve Months) is used for the most recent partial year
+# to enable fair comparison with full calendar years.
+# Example: If latest data is through Sep 2025, use Oct 2024 - Sep 2025
+# This avoids partial-year bias in year-over-year comparisons.
+if year == LTM_END_YEAR and LTM_ENABLED:
+    # Use 12-month rolling period instead of partial calendar year
+    year_data = get_ltm_data(df, ltm_start, ltm_end)
+    year_label = get_ltm_label()  # Returns "2025 (LTM 9/2025)"
+```
+
+## Summary Checklist
+
+For Cursor-optimized code:
+- ✅ Comprehensive docstrings with examples
+- ✅ Type hints on functions
+- ✅ Descriptive variable names
+- ✅ Clear comments for business logic
+- ✅ Structured error messages
+- ✅ Consistent code patterns
+- ✅ Use config values (never hardcode)
+- ✅ Follow template utilities
+- ✅ Include validation steps
+- ✅ Reference documentation
+
+## Summary
+
+Follow these standards to ensure:
+1. AI can understand code structure
+2. AI can modify code safely
+3. AI can generate new code following patterns
+4. Code is maintainable and readable
+5. Errors are clear and actionable
+6. Cursor AI can assist effectively
+
+---
+
+**Last Updated:** January 2026  
+**For:** Cursor AI optimization and human developers
diff --git a/.cursor/rules/common_errors.md b/.cursor/rules/common_errors.md
new file mode 100644
index 0000000..effe0d7
--- /dev/null
+++ b/.cursor/rules/common_errors.md
@@ -0,0 +1,109 @@
+# Common Errors and Troubleshooting
+
+**Quick reference for fixing common issues. For error handling patterns when writing code, see `error_handling.md`.**
+
+## Data Loading Errors
+
+### Error: "Data file not found"
+**Cause:** DATA_FILE path in config.py is incorrect
+**Fix:**
+1. Check that your CSV file exists
+2. Update `DATA_FILE` in config.py with correct filename
+3. If file is in a subdirectory, set `DATA_DIR` in config.py
+
+### Error: "Required column 'USD' not found"
+**Cause:** Column name in data doesn't match config
+**Fix:**
+1. Check your CSV column names
+2. Update `REVENUE_COLUMN` in config.py to match your data
+3. Update other column mappings (DATE_COLUMN, CUSTOMER_COLUMN, etc.)
+
+### Error: "All InvoiceDate values are NaN"
+**Cause:** Date column parsing failed
+**Fix:**
+1. Check date format in your CSV
+2. Add fallback date columns to `DATE_FALLBACK_COLUMNS` in config.py
+3. Ensure at least one date column exists (Month, Year, etc.)
+
+## Analysis Errors
+
+### Error: "DataFrame is empty" after filtering
+**Cause:** Date range or year filters too restrictive
+**Fix:**
+1. Check `MIN_YEAR` and `MAX_DATE` in config.py
+2. Check `ANALYSIS_YEARS` includes years in your data
+3. Verify date parsing worked (check data_loader output)
+
+### Error: Charts show scientific notation (1e8)
+**Cause:** Forgot to divide by 1e6 before plotting
+**Fix:**
+```python
+# WRONG:
+ax.plot(revenue, ...)
+
+# CORRECT:
+ax.plot(revenue / 1e6, ...)
+setup_revenue_chart(ax)
+```
+
+### Error: "Year column has mixed types"
+**Cause:** LTM year is string "2025 (LTM 9/2025)" while others are int
+**Fix:**
+```python
+from analysis_utils import sort_mixed_years
+df_sorted = sort_mixed_years(df, year_col='Year')
+```
+
+## Configuration Errors
+
+### Error: LTM not working correctly
+**Cause:** LTM configuration incorrect
+**Fix:**
+1. Check `LTM_ENABLED = True` in config.py
+2. Verify `LTM_START_MONTH`, `LTM_START_YEAR`, `LTM_END_MONTH`, `LTM_END_YEAR`
+3. Ensure dates are within your data range
+
+### Error: Exclusion filters not working
+**Cause:** Filter configuration incorrect
+**Fix:**
+1. Check `EXCLUSION_FILTERS['enabled'] = True`
+2. Verify `exclude_by_column` matches a column in your data
+3. Check `exclude_values` list is correct
+
+## Import Errors
+
+### Error: "No module named 'config'"
+**Cause:** Running script from wrong directory
+**Fix:**
+1. Run scripts from template root directory
+2. Or add template directory to Python path
+
+### Error: "No module named 'data_loader'"
+**Cause:** Missing import or wrong directory
+**Fix:**
+1. Ensure all template files are in the same directory
+2. Check import statements match file names
+
+## Best Practices to Avoid Errors
+
+1. **Always use utilities:** Use `analysis_utils.py` functions instead of manual code
+2. **Validate data:** Run `validate_data_structure()` after loading
+3. **Check config:** Verify all column names match your data (use `config_validator.py`)
+4. **Test incrementally:** Test data loading before running full analysis
+5. **Read error messages:** They usually tell you exactly what's wrong
+6. **Use Cursor AI:** Ask AI to fix errors - it knows template patterns
+
+## Using Cursor AI to Fix Errors
+
+When you encounter an error, ask Cursor AI:
+```
+"Fix this error: [paste error message]"
+```
+
+The AI will:
+- ✅ Understand the error context
+- ✅ Reference template patterns
+- ✅ Suggest specific fixes
+- ✅ Use template utilities correctly
+
+**See also:** `.cursor/rules/error_handling.md` for how to write error messages that help AI fix issues.
diff --git a/.cursor/rules/data_loading.md b/.cursor/rules/data_loading.md
new file mode 100644
index 0000000..b6acc5a
--- /dev/null
+++ b/.cursor/rules/data_loading.md
@@ -0,0 +1,69 @@
+# Data Loading Rules
+
+## CRITICAL: Always Use data_loader.py
+
+**NEVER load data directly with `pd.read_csv()`. Always use:**
+
+```python
+from data_loader import load_sales_data
+from config import get_data_path
+df = load_sales_data(get_data_path())
+```
+
+## Why This Matters
+
+The `data_loader.py` implements intelligent fallback logic to ensure 100% date coverage:
+
+1. **Primary:** Parse primary date column (from config.DATE_COLUMN)
+2. **Fallback 1:** Use fallback date columns if primary is missing (from config.DATE_FALLBACK_COLUMNS)
+3. **Fallback 2:** Use Year column if both missing
+4. **Result:** Maximum date coverage possible
+
+## What data_loader.py Provides
+
+- **Date Column:** Properly parsed datetime with fallback logic
+- **Year:** Extracted year (100% coverage via fallback)
+- **YearMonth:** Period format for monthly aggregations
+- **Revenue Column:** Converted to numeric (from config.REVENUE_COLUMN)
+
+## Column Configuration
+
+Before using, configure column names in `config.py`:
+- `REVENUE_COLUMN`: Your revenue/amount column name
+- `DATE_COLUMN`: Primary date column name
+- `DATE_FALLBACK_COLUMNS`: List of fallback date columns
+- `CUSTOMER_COLUMN`: Customer/account column name
+- Other columns as needed
+
+## Common Mistakes
+
+❌ **WRONG:**
+```python
+df = pd.read_csv('sales_data.csv')
+df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
+df = df.dropna(subset=['Date'])  # May drop significant data!
+```
+
+✅ **CORRECT:**
+```python
+from data_loader import load_sales_data
+from config import get_data_path
+df = load_sales_data(get_data_path())  # Uses fallback logic
+```
+
+## Data File Location
+
+The data file path is configured in `config.py`:
+- `DATA_FILE`: Filename (e.g., 'sales_data.csv')
+- `DATA_DIR`: Optional subdirectory (defaults to current directory)
+- Use `get_data_path()` to get the full path
+
+## Validation
+
+After loading, validate data structure:
+```python
+from data_loader import validate_data_structure
+is_valid, msg = validate_data_structure(df)
+if not is_valid:
+    print(f"ERROR: {msg}")
+```
diff --git a/.cursor/rules/error_handling.md b/.cursor/rules/error_handling.md
new file mode 100644
index 0000000..38d3545
--- /dev/null
+++ b/.cursor/rules/error_handling.md
@@ -0,0 +1,276 @@
+# Error Handling Best Practices
+
+This guide defines how to handle errors in a way that's helpful for both users and AI assistants.
+
+## Error Message Structure
+
+### Required Elements
+
+Every error message should include:
+1. **What went wrong** - Specific error description
+2. **Where it occurred** - File/function context
+3. **Why it happened** - Root cause explanation
+4. **How to fix** - Actionable steps
+5. **Reference** - Link to relevant documentation
+
+### Template
+
+```python
+raise ErrorType(
+    f"[What] - [Specific description]\n"
+    f"\n"
+    f"Context: [Where/When this occurred]\n"
+    f"Reason: [Why this happened]\n"
+    f"\n"
+    f"Solution:\n"
+    f"1. [Step 1]\n"
+    f"2. [Step 2]\n"
+    f"\n"
+    f"For more help, see: [Documentation reference]"
+)
+```
+
+## Common Error Patterns
+
+### Data Loading Errors
+
+```python
+# Good: Comprehensive error message
+if REVENUE_COLUMN not in df.columns:
+    available_cols = list(df.columns)[:10]  # Show first 10
+    raise ValueError(
+        f"Required column '{REVENUE_COLUMN}' not found in data.\n"
+        f"\n"
+        f"Context: Loading data from {filepath}\n"
+        f"Available columns: {available_cols}\n"
+        f"\n"
+        f"Solution:\n"
+        f"1. Check your CSV file column names\n"
+        f"2. Update REVENUE_COLUMN in config.py to match your data\n"
+        f"3. Run: python config_validator.py to validate configuration\n"
+        f"\n"
+        f"For more help, see: .cursor/rules/data_loading.md"
+    )
+
+# Bad: Vague error
+if REVENUE_COLUMN not in df.columns:
+    raise ValueError("Column not found")
+```
+
+### Configuration Errors
+
+```python
+# Good: Actionable error
+if LTM_ENABLED and (LTM_START is None or LTM_END is None):
+    raise ValueError(
+        f"LTM configuration error: LTM_ENABLED is True but LTM period is not set.\n"
+        f"\n"
+        f"Context: Configuration in config.py\n"
+        f"Current values: LTM_ENABLED={LTM_ENABLED}, LTM_START={LTM_START}, LTM_END={LTM_END}\n"
+        f"\n"
+        f"Solution:\n"
+        f"1. Set LTM_START_MONTH, LTM_START_YEAR, LTM_END_MONTH, LTM_END_YEAR in config.py\n"
+        f"2. Or set LTM_ENABLED = False if you don't need LTM\n"
+        f"3. Run: python config_validator.py to check configuration\n"
+        f"\n"
+        f"For more help, see: .cursor/rules/ltm_methodology.md"
+    )
+```
+
+### Data Quality Errors
+
+```python
+# Good: Helpful data quality error
+if date_coverage < 0.5:  # Less than 50% coverage
+    raise ValueError(
+        f"Data quality issue: Only {date_coverage:.1%} of rows have valid dates.\n"
+        f"\n"
+        f"Context: Date parsing in data_loader.py\n"
+        f"Rows with dates: {date_count:,} / {total_rows:,}\n"
+        f"\n"
+        f"Solution:\n"
+        f"1. Check date format in your CSV file\n"
+        f"2. Add fallback date columns to DATE_FALLBACK_COLUMNS in config.py\n"
+        f"3. Ensure at least one date column (Month, Year) exists\n"
+        f"4. Run: python data_quality.py to analyze data quality\n"
+        f"\n"
+        f"For more help, see: .cursor/rules/data_loading.md"
+    )
+```
+
+## Error Handling Patterns
+
+### Try-Except with Context
+
+```python
+# Good: Provides context and recovery options
+try:
+    df = load_sales_data(get_data_path())
+except FileNotFoundError as e:
+    error_msg = (
+        f"Data file not found: {e}\n"
+        f"\n"
+        f"Context: Attempting to load data for analysis\n"
+        f"Expected file: {get_data_path()}\n"
+        f"\n"
+        f"Solution:\n"
+        f"1. Check that your CSV file exists at the expected location\n"
+        f"2. Update DATA_FILE in config.py with correct filename\n"
+        f"3. Or update DATA_DIR if file is in a subdirectory\n"
+        f"4. Run: python setup_wizard.py to reconfigure\n"
+        f"\n"
+        f"For more help, see: .cursor/rules/common_errors.md"
+    )
+    raise FileNotFoundError(error_msg) from e
+```
+
+### Validation with Helpful Messages
+
+```python
+# Good: Validates and provides specific guidance
+def validate_data_structure(df: pd.DataFrame) -> Tuple[bool, str]:
+    """
+    Validate DataFrame has required structure
+    
+    Returns:
+        Tuple[bool, str]: (is_valid, error_message)
+                         If is_valid is False, error_message contains actionable guidance
+    """
+    errors = []
+    
+    if REVENUE_COLUMN not in df.columns:
+        errors.append(
+            f"Missing required column '{REVENUE_COLUMN}'. "
+            f"Update REVENUE_COLUMN in config.py to match your data."
+        )
+    
+    if DATE_COLUMN not in df.columns:
+        errors.append(
+            f"Missing required column '{DATE_COLUMN}'. "
+            f"Update DATE_COLUMN in config.py or add fallback columns."
+        )
+    
+    if len(df) == 0:
+        errors.append(
+            f"DataFrame is empty. Check date filters (MIN_YEAR, MAX_DATE) in config.py."
+        )
+    
+    if errors:
+        error_msg = "Data validation failed:\n" + "\n".join(f"  - {e}" for e in errors)
+        error_msg += "\n\nRun: python config_validator.py for detailed validation"
+        return False, error_msg
+    
+    return True, "OK"
+```
+
+## Warning Messages
+
+### When to Use Warnings
+
+Use warnings (not errors) for:
+- Non-critical data quality issues
+- Optional features that aren't configured
+- Deprecated functionality
+- Performance considerations
+
+### Warning Format
+
+```python
+import warnings
+
+# Good: Informative warning
+if date_coverage < 0.9:  # Less than 90% but not critical
+    warnings.warn(
+        f"Date coverage is {date_coverage:.1%} ({missing_count:,} rows missing dates).\n"
+        f"Consider adding fallback date columns to improve coverage.\n"
+        f"See .cursor/rules/data_loading.md for details.",
+        UserWarning
+    )
+```
+
+## Logging Errors
+
+### Use Structured Logging
+
+```python
+from logger_config import get_logger
+
+logger = get_logger('analysis_name')
+
+try:
+    df = load_sales_data(get_data_path())
+except Exception as e:
+    logger.error(
+        f"Failed to load data: {e}",
+        exc_info=True,  # Include stack trace
+        extra={
+            'file_path': str(get_data_path()),
+            'config_file': 'config.py',
+            'suggestion': 'Run config_validator.py to check configuration'
+        }
+    )
+    raise
+```
+
+## AI-Friendly Error Messages
+
+### Help AI Understand and Fix
+
+Error messages should help AI assistants:
+1. Understand what went wrong
+2. Know where to look for fixes
+3. Suggest specific solutions
+4. Reference relevant documentation
+
+```python
+# Good: AI can parse and act on this
+if column not in df.columns:
+    raise ValueError(
+        f"Column '{column}' not found.\n"
+        f"Available: {list(df.columns)}\n"
+        f"Fix: Update {column}_COLUMN in config.py\n"
+        f"See: .cursor/rules/data_loading.md"
+    )
+
+# Bad: AI has no context
+if column not in df.columns:
+    raise ValueError("Not found")
+```
+
+## Error Recovery
+
+### Provide Recovery Options
+
+```python
+# Good: Offers recovery path
+def load_sales_data(filepath=None):
+    try:
+        df = pd.read_csv(filepath)
+    except FileNotFoundError:
+        # Suggest alternatives
+        suggestions = [
+            f"1. Check file path: {filepath}",
+            f"2. Update DATA_FILE in config.py",
+            f"3. Run: python setup_wizard.py",
+            f"4. Generate sample data: python generate_sample_data.py"
+        ]
+        raise FileNotFoundError(
+            f"Data file not found: {filepath}\n"
+            f"\n"
+            f"Options:\n" + "\n".join(suggestions)
+        )
+```
+
+## Summary
+
+Good error handling:
+- ✅ Specific and actionable
+- ✅ Provides context
+- ✅ Suggests solutions
+- ✅ References documentation
+- ✅ Helps both users and AI assistants
+
+---
+
+**Last Updated:** January 2026  
+**For:** Error handling in sales_analysis_template
diff --git a/.cursor/rules/ltm_methodology.md b/.cursor/rules/ltm_methodology.md
new file mode 100644
index 0000000..51e4d0e
--- /dev/null
+++ b/.cursor/rules/ltm_methodology.md
@@ -0,0 +1,89 @@
+# LTM (Last Twelve Months) Methodology Rules
+
+## ⭐ RECOMMENDED: Use analysis_utils.py
+
+**Prefer utility functions:**
+```python
+from analysis_utils import get_ltm_period_config, get_annual_data, calculate_annual_metrics
+from config import get_ltm_period, get_ltm_label
+
+ltm_start, ltm_end = get_ltm_period_config()
+year_data, year_label = get_annual_data(df, 2025, ltm_start, ltm_end)
+```
+
+## What is LTM?
+
+**LTM (Last Twelve Months)** = Rolling 12-month period for the most recent partial year
+- **Purpose:** Apples-to-apples comparison with full calendar years
+- **Example:** If latest data is through September 2025, use Oct 2024 - Sep 2025 (12 months)
+
+## When to Use LTM
+
+- **Full calendar years (2021-2024):** Use complete year data
+- **Most recent partial year (2025):** Use LTM if you only have partial year data
+- **Complete years only:** Disable LTM in config if all years are complete
+
+## Configuration
+
+**Configure in config.py:**
+```python
+LTM_ENABLED = True  # Set to False if all years are complete
+LTM_START_MONTH = 10  # Month number (1-12)
+LTM_START_YEAR = 2024
+LTM_END_MONTH = 9
+LTM_END_YEAR = 2025
+```
+
+## Implementation Pattern
+
+```python
+from analysis_utils import get_ltm_period_config, get_annual_data
+
+ltm_start, ltm_end = get_ltm_period_config()
+
+for year in sorted(df['Year'].unique()):
+    year_data, year_label = get_annual_data(df, year, ltm_start, ltm_end)
+    # year_label will be "2025 (LTM 9/2025)" for LTM year, or "2025" for regular year
+```
+
+## Labeling Requirements
+
+**ALWAYS label LTM year with notation in:**
+- Chart titles
+- Chart x-axis labels
+- Table headers
+- Print statements
+- Report text
+
+**Example:**
+```python
+from config import get_ltm_label
+
+ltm_label = get_ltm_label()  # Returns "2025 (LTM 9/2025)" or None
+if ltm_label:
+    title = f'Annual Revenue Trend\n({ltm_label})'
+```
+
+## Common Mistakes
+
+❌ **WRONG:**
+```python
+year_2025_data = df[df['Year'] == 2025]  # Uses partial year (not comparable)
+```
+
+✅ **CORRECT:**
+```python
+from analysis_utils import get_annual_data
+ltm_start, ltm_end = get_ltm_period_config()
+year_2025_data, year_label = get_annual_data(df, 2025, ltm_start, ltm_end)
+```
+
+## Disabling LTM
+
+If all years in your analysis are complete calendar years:
+```python
+# In config.py:
+LTM_ENABLED = False
+```
+
+Then all years will be treated as full calendar years.
diff --git a/EXAMPLES.md b/EXAMPLES.md
new file mode 100644
index 0000000..2d5ed1f
--- /dev/null
+++ b/EXAMPLES.md
@@ -0,0 +1,203 @@
+# Example Analysis Scripts
+
+This directory contains working example analysis scripts that demonstrate how to use the sales analysis template framework.
+
+## Available Examples
+
+### 1. Annual Revenue Trend (`examples/annual_revenue_trend.py`)
+
+**Purpose:** Simple annual revenue analysis with LTM support
+
+**What it demonstrates:**
+- Loading data using `data_loader`
+- Calculating annual metrics with LTM
+- Creating a revenue trend chart
+- Following template best practices
+
+**Usage:**
+```bash
+python examples/annual_revenue_trend.py
+```
+
+**Output:**
+- Chart: `charts/annual_revenue_trend.png`
+- Console output with annual revenue summary
+
+---
+
+### 2. Customer Segmentation (`examples/customer_segmentation.py`)
+
+**Purpose:** Customer segmentation using RFM (Recency, Frequency, Monetary) methodology
+
+**What it demonstrates:**
+- Customer-level aggregation
+- RFM scoring and segmentation
+- Segment analysis and visualization
+- Multiple chart generation
+
+**Usage:**
+```bash
+python examples/customer_segmentation.py
+```
+
+**Output:**
+- Chart: `charts/customer_segmentation.png`
+- Console output with segment summary
+
+**Segments:**
+- **Champions:** High recency, frequency, and monetary value
+- **Loyal Customers:** Regular customers with good value
+- **At Risk:** Recent but declining frequency
+- **Hibernating:** Low recency, may need reactivation
+- **Potential Loyalists:** Good recency and frequency, lower value
+- **Need Attention:** Mixed signals, need engagement
+
+---
+
+### 3. Product Performance (`examples/product_performance.py`)
+
+**Purpose:** Product mix and performance analysis
+
+**What it demonstrates:**
+- Product-level aggregation
+- Product performance metrics
+- Top products identification
+- Product mix visualization
+
+**Usage:**
+```bash
+python examples/product_performance.py
+```
+
+**Output:**
+- Chart: `charts/product_performance.png`
+- Console output with top products summary
+
+---
+
+## How to Use Examples
+
+### Step 1: Configure Template
+
+Before running examples, ensure your template is configured:
+
+```bash
+python setup_wizard.py
+```
+
+Or manually update `config.py` with your data file and column mappings.
+
+### Step 2: Prepare Data
+
+Place your sales data CSV file in the template directory, or update `DATA_DIR` in `config.py`.
+
+Alternatively, generate sample data for testing:
+
+```bash
+python generate_sample_data.py
+```
+
+### Step 3: Run Example
+
+```bash
+python examples/annual_revenue_trend.py
+```
+
+### Step 4: Customize
+
+Copy an example script and modify it for your needs:
+
+```bash
+cp examples/annual_revenue_trend.py my_analysis.py
+# Edit my_analysis.py
+python my_analysis.py
+```
+
+---
+
+## Example Patterns
+
+### Pattern 1: Simple Annual Analysis
+
+```python
+from data_loader import load_sales_data
+from analysis_utils import calculate_annual_metrics, get_ltm_period_config
+from config import REVENUE_COLUMN
+
+df = load_sales_data(get_data_path())
+ltm_start, ltm_end = get_ltm_period_config()
+
+def calculate_metrics(year_data):
+    return {'Revenue': year_data[REVENUE_COLUMN].sum()}
+
+annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
+```
+
+### Pattern 2: Customer-Level Analysis
+
+```python
+from config import CUSTOMER_COLUMN, REVENUE_COLUMN
+
+customer_metrics = df.groupby(CUSTOMER_COLUMN).agg({
+    REVENUE_COLUMN: 'sum',
+    DATE_COLUMN: 'count'
+}).reset_index()
+```
+
+### Pattern 3: Product-Level Analysis
+
+```python
+from config import ITEM_COLUMN, REVENUE_COLUMN
+
+product_metrics = df.groupby(ITEM_COLUMN)[REVENUE_COLUMN].sum().sort_values(ascending=False)
+top_10 = product_metrics.head(10)
+```
+
+---
+
+## Learning Path
+
+1. **Start with:** `annual_revenue_trend.py` - Simplest example
+2. **Then try:** `product_performance.py` - More complex aggregation
+3. **Advanced:** `customer_segmentation.py` - Multi-step analysis with custom logic
+
+---
+
+## Troubleshooting
+
+**"Module not found" errors:**
+- Ensure you're running from the template root directory
+- Check that all template files are present
+
+**"Data file not found" errors:**
+- Run `setup_wizard.py` to configure data file path
+- Or update `DATA_FILE` in `config.py`
+
+**"Column not found" errors:**
+- Update column mappings in `config.py`
+- Run `python config_validator.py` to check configuration
+
+---
+
+## Advanced Examples
+
+For more sophisticated analyses, see:
+- `.cursor/rules/advanced_analysis_patterns.md` - Advanced analysis patterns
+- `.cursor/rules/ai_assistant_guide.md` - How to use Cursor AI effectively
+
+## Next Steps
+
+After running examples:
+
+1. Review the generated charts
+2. Examine the code to understand patterns
+3. Copy an example and customize for your analysis
+4. Check `.cursor/rules/analysis_patterns.md` for more patterns
+5. Read `.cursor/rules/advanced_analysis_patterns.md` for advanced techniques
+6. Use Cursor AI with prompts from `ai_assistant_guide.md`
+7. Read `README.md` for comprehensive documentation
+
+---
+
+**Last Updated:** January 2026  
+**Template Version:** 1.0
diff --git a/QUICK_START.md b/QUICK_START.md
new file mode 100644
index 0000000..58568b7
--- /dev/null
+++ b/QUICK_START.md
@@ -0,0 +1,175 @@
+# Quick Start Guide
+
+**For Cursor Users:** This template is optimized for Cursor AI. Just ask: *"Create a revenue analysis using the template"* and the AI will handle everything.
+
+## 🚀 Get Started in 5 Minutes
+
+### Step 1: Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+
+### Step 2: Run Setup Wizard
+```bash
+python setup_wizard.py
+```
+
+The wizard will ask you:
+- Company name
+- Data file location
+- Column names in your CSV
+- Date range
+- LTM configuration (if needed)
+
+### Step 3: Test Data Loading
+```bash
+python -c "from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); print(f'✓ Loaded {len(df):,} rows')"
+```
+
+### Step 4: Run Example Analysis (Recommended)
+```bash
+# Try an example first to see how it works
+python examples/annual_revenue_trend.py
+```
+
+### Step 5: Create Your First Analysis
+```bash
+cp analysis_template.py my_analysis.py
+# Or copy an example
+cp examples/annual_revenue_trend.py my_analysis.py
+# Edit my_analysis.py
+python my_analysis.py
+```
+
+---
+
+## 📋 Essential Configuration Checklist
+
+Before running analyses, verify in `config.py`:
+
+- [ ] `COMPANY_NAME` - Your company name
+- [ ] `DATA_FILE` - Your CSV filename
+- [ ] `REVENUE_COLUMN` - Your revenue column name
+- [ ] `DATE_COLUMN` - Your date column name
+- [ ] `CUSTOMER_COLUMN` - Your customer column name
+- [ ] `ANALYSIS_YEARS` - Years to include
+- [ ] `MIN_YEAR` and `MAX_DATE` - Date range
+- [ ] `LTM_ENABLED` - Set to False if all years complete
+
+---
+
+## 💡 Common Patterns
+
+### Load Data
+```python
+from data_loader import load_sales_data
+from config import get_data_path
+
+df = load_sales_data(get_data_path())
+```
+
+### Calculate Annual Metrics
+```python
+from analysis_utils import calculate_annual_metrics, get_ltm_period_config
+from config import REVENUE_COLUMN
+
+ltm_start, ltm_end = get_ltm_period_config()
+
+def calculate_metrics(year_data):
+    return {'Revenue': year_data[REVENUE_COLUMN].sum()}
+
+annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
+```
+
+### Create Chart
+```python
+from analysis_utils import setup_revenue_chart, save_chart
+from config import CHART_SIZES
+import matplotlib.pyplot as plt
+
+fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+ax.plot(data / 1e6, ...)  # Divide by 1e6!
+setup_revenue_chart(ax)
+save_chart(fig, 'chart.png')
+plt.close()
+```
+
+---
+
+## ⚠️ Critical Rules
+
+1. **ALWAYS use `data_loader.py`** - Never `pd.read_csv()` directly
+2. **ALWAYS divide by 1e6** before plotting revenue
+3. **ALWAYS use `setup_revenue_chart()`** for revenue charts
+4. **ALWAYS use config values** - Never hardcode column names
+5. **ALWAYS validate data** after loading
+
+## 💡 New Utilities
+
+### Data Quality Check
+```bash
+python -c "from data_quality import generate_data_quality_report, print_data_quality_report; from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); report = generate_data_quality_report(df); print_data_quality_report(report)"
+```
+
+### Configuration Validation
+```bash
+python config_validator.py
+```
+
+### Export Results
+```python
+from export_utils import export_to_excel
+export_to_excel(df, 'results.xlsx')
+```
+
+### Generate Sample Data
+```bash
+python generate_sample_data.py
+```
+
+---
+
+## 🐛 Quick Troubleshooting
+
+**"Data file not found"**
+→ Check `DATA_FILE` in config.py
+
+**"Column not found"**
+→ Update column mappings in config.py
+
+**Charts show 1e8 (scientific notation)**
+→ Divide by 1e6 before plotting: `ax.plot(data / 1e6, ...)`
+
+**"DataFrame is empty"**
+→ Check `MIN_YEAR`, `MAX_DATE`, and `ANALYSIS_YEARS` in config.py
+
+---
+
+## 🎯 Using Cursor AI (Recommended)
+
+This template is optimized for Cursor. Instead of manual setup, just ask:
+
+```
+"Create a revenue trend analysis using template patterns"
+```
+
+The AI will:
+- ✅ Use all template utilities automatically
+- ✅ Follow best practices
+- ✅ Include proper validation
+- ✅ Generate production-ready code
+
+**See:** `.cursor/rules/ai_assistant_guide.md` for complete prompt library
+
+## 📚 Next Steps
+
+- **Run examples:** Try `examples/annual_revenue_trend.py` to see it in action
+- **Check data quality:** Run `python data_quality.py` to analyze your data
+- **Validate config:** Run `python config_validator.py` to check configuration
+- **Read documentation:** See `README.md` for comprehensive guide
+- **Review patterns:** Check `.cursor/rules/` for detailed patterns
+- **See examples:** Check `EXAMPLES.md` for example script guide
+
+---
+
+**Need help?** Check `.cursor/rules/common_errors.md` for detailed troubleshooting.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0e13abd
--- /dev/null
+++ b/README.md
@@ -0,0 +1,589 @@
+# Sales Analysis Template
+
+**A best-in-class, reusable template for sales invoice detail analysis**
+
+**Optimized for Cursor AI** - Just ask the AI to create analyses and it handles everything automatically.
+
+This template provides a complete framework for analyzing sales data from any company. It's designed to be:
+- **Flexible:** Works with different column names, date formats, and data structures
+- **Automated:** Interactive setup wizard configures everything for your company
+- **AI-Optimized:** Fully optimized for Cursor - AI knows all patterns and generates code automatically
+- **Production-Ready:** Includes error handling, validation, and best practices
+
+---
+
+## 🚀 Quick Start
+
+### 1. Setup (Automated)
+
+Run the interactive setup wizard:
+
+```bash
+python setup_wizard.py
+```
+
+The wizard will ask you about:
+- Company name and analysis date
+- Data file location
+- Column names in your CSV
+- Date range and LTM configuration
+- Exclusion filters (if needed)
+
+### 2. Manual Setup (Alternative)
+
+If you prefer to configure manually:
+
+1. **Update `config.py`** with your company-specific settings:
+   - `COMPANY_NAME`: Your company name
+   - `DATA_FILE`: Your CSV filename
+   - `REVENUE_COLUMN`: Your revenue/amount column name
+   - `DATE_COLUMN`: Your primary date column
+   - Column mappings for Customer, Item, etc.
+   - Date range and LTM settings
+
+2. **Place your data file** in the template directory (or update `DATA_DIR` in config.py)
+
+### 3. Test Data Loading
+
+Verify your configuration works:
+
+```bash
+python -c "from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); print(f'Loaded {len(df):,} rows')"
+```
+
+### 4. Create Your First Analysis
+
+Copy the template and customize:
+
+```bash
+cp analysis_template.py my_first_analysis.py
+# Edit my_first_analysis.py with your analysis logic
+python my_first_analysis.py
+```
+
+---
+
+## 📁 Project Structure
+
+```
+sales_analysis_template/
+├── README.md                    # This file
+├── QUICK_START.md               # Quick start guide
+├── TEMPLATE_OVERVIEW.md         # High-level overview
+├── TEMPLATE_SUMMARY.md          # Comprehensive template summary
+├── EXAMPLES.md                  # Example scripts guide
+├── SETUP_CHECKLIST.md           # Setup verification checklist
+├── requirements.txt             # Python dependencies
+├── setup_wizard.py              # Interactive setup wizard
+│
+├── config.py                    # ⭐ Configuration (customize for your company)
+├── config_validator.py          # Configuration validation utility
+│
+├── data_loader.py               # ⭐ Data loading with fallback logic
+├── data_quality.py              # Data quality reporting
+├── data_processing.py           # Data transformation utilities
+│
+├── analysis_utils.py           # ⭐ Common utilities (formatters, LTM, helpers)
+├── statistical_utils.py         # Statistical analysis utilities
+├── validate_revenue.py          # Revenue validation utility
+│
+├── export_utils.py              # Export to CSV/Excel
+├── report_generator.py          # PDF report generation
+├── logger_config.py             # Logging configuration
+│
+├── analysis_template.py         # Template for creating new analyses
+├── run_all_analyses.py          # Batch runner for all scripts
+├── generate_sample_data.py      # Generate sample data for testing
+│
+├── examples/                    # Example analysis scripts
+│   ├── annual_revenue_trend.py  # Simple annual revenue analysis
+│   ├── customer_segmentation.py # RFM customer segmentation
+│   ├── cohort_analysis.py       # Customer cohort analysis
+│   └── product_performance.py   # Product performance analysis
+│
+├── tests/                       # Unit tests
+│   ├── test_data_loader.py      # Data loader tests
+│   ├── test_analysis_utils.py   # Analysis utils tests
+│   └── test_config_validator.py # Config validator tests
+│
+└── .cursor/
+    └── rules/                   # Cursor IDE rules (auto-loaded)
+        ├── ai_assistant_guide.md # Complete AI assistant guide
+        ├── advanced_analysis_patterns.md # Advanced techniques
+        ├── analysis_patterns.md  # Common analysis patterns
+        ├── chart_formatting.md   # Chart formatting rules
+        ├── code_quality.md       # Code quality standards
+        ├── common_errors.md      # Error troubleshooting
+        ├── data_loading.md       # Data loading patterns
+        ├── error_handling.md     # Error handling patterns
+        └── ltm_methodology.md    # LTM methodology
+```
+
+---
+
+## 🔧 Configuration Guide
+
+### Required Configuration
+
+**In `config.py`, you MUST configure:**
+
+1. **Company Information:**
+   ```python
+   COMPANY_NAME = "Your Company Name"
+   ```
+
+2. **Data File:**
+   ```python
+   DATA_FILE = 'your_sales_data.csv'
+   ```
+
+3. **Column Mappings:**
+   ```python
+   REVENUE_COLUMN = 'USD'  # Your revenue column name
+   DATE_COLUMN = 'InvoiceDate'  # Your date column name
+   CUSTOMER_COLUMN = 'Customer'  # Your customer column name
+   ```
+
+4. **Date Range:**
+   ```python
+   MIN_YEAR = 2021
+   MAX_DATE = pd.Timestamp('2025-09-30')
+   ANALYSIS_YEARS = [2021, 2022, 2023, 2024, 2025]
+   ```
+
+### Optional Configuration
+
+**LTM (Last Twelve Months):**
+```python
+LTM_ENABLED = True  # Set to False if all years are complete
+LTM_START_MONTH = 10
+LTM_START_YEAR = 2024
+LTM_END_MONTH = 9
+LTM_END_YEAR = 2025
+```
+
+**Exclusion Filters:**
+```python
+EXCLUSION_FILTERS = {
+    'enabled': True,
+    'exclude_by_column': 'Country',
+    'exclude_values': ['Test', 'KVT']
+}
+```
+
+**See `config.py` for all available options and detailed comments.**
+
+---
+
+## 📊 Data Requirements
+
+### Required Columns
+
+Your CSV file must have:
+- **Revenue column:** A numeric column with sales amounts (configured as `REVENUE_COLUMN`)
+- **Date column:** At least one date column (configured as `DATE_COLUMN`)
+
+### Recommended Columns
+
+For full analysis capabilities, include:
+- **Customer/Account:** For customer segmentation and analysis
+- **Item/Product:** For product analysis
+- **Quantity:** For price calculations
+- **Geographic:** Region, Country for geographic analysis
+- **Segments:** Technology, EndMarket, ProductGroup for segmentation
+
+### Date Column Fallback
+
+The data loader supports fallback logic:
+1. **Primary:** Uses `DATE_COLUMN` (e.g., InvoiceDate)
+2. **Fallback 1:** Uses columns in `DATE_FALLBACK_COLUMNS` (e.g., Month, Year)
+3. **Fallback 2:** Constructs from Year column if available
+
+This ensures maximum date coverage even if some rows have missing dates.
+
+---
+
+## 💻 Creating Analysis Scripts
+
+### Using the Template
+
+1. **Copy the template:**
+   ```bash
+   cp analysis_template.py my_analysis.py
+   ```
+
+2. **Update configuration:**
+   ```python
+   ANALYSIS_NAME = "My Analysis"
+   DESCRIPTION = "Description of what this analysis does"
+   ```
+
+3. **Implement your logic:**
+   - Use `calculate_annual_metrics()` for annual aggregations
+   - Use `setup_revenue_chart()` and `save_chart()` for visualizations
+   - Follow patterns from `.cursor/rules/analysis_patterns.md`
+
+4. **Run your analysis:**
+   ```bash
+   python my_analysis.py
+   ```
+
+### Standard Pattern
+
+```python
+from data_loader import load_sales_data, validate_data_structure
+from analysis_utils import (
+    get_ltm_period_config, calculate_annual_metrics,
+    setup_revenue_chart, save_chart, apply_exclusion_filters
+)
+from config import get_data_path, REVENUE_COLUMN, CHART_SIZES
+
+# Load and validate
+df = load_sales_data(get_data_path())
+is_valid, msg = validate_data_structure(df)
+if not is_valid:
+    print(f"ERROR: {msg}")
+    return
+
+# Apply filters
+df = apply_exclusion_filters(df)
+
+# Calculate metrics
+ltm_start, ltm_end = get_ltm_period_config()
+annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
+
+# Create charts
+fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+ax.plot(data / 1e6, ...)
+setup_revenue_chart(ax)
+save_chart(fig, 'chart.png')
+```
+
+---
+
+## 🎯 Key Features
+
+### 1. Flexible Data Loading
+
+- Handles different column names via configuration
+- Fallback logic for date parsing (100% coverage)
+- Automatic validation and error reporting
+
+### 2. LTM (Last Twelve Months) Support
+
+- Automatic LTM calculation for partial years
+- Apples-to-apples comparison with full calendar years
+- Configurable LTM periods
+
+### 3. Standardized Chart Formatting
+
+- Automatic millions formatter for revenue charts
+- Consistent styling and sizing
+- Professional output ready for reports
+- Optional interactive charts with Plotly
+
+### 4. Exclusion Filters
+
+- Easy configuration for excluding segments
+- Useful for excluding test accounts, business units, etc.
+
+### 5. Revenue Validation
+
+- Automatic validation after each analysis
+- Ensures data loading is working correctly
+- Optional validation against expected values
+
+### 6. Example Scripts
+
+- Working examples for common analyses
+- Demonstrates best practices
+- Easy to customize and extend
+
+### 7. Data Export
+
+- Export results to CSV and Excel
+- Formatted summary tables
+- Multiple sheet support
+
+### 8. Data Quality Reporting
+
+- Comprehensive data quality checks
+- Missing value analysis
+- Outlier detection
+- Data profiling
+
+### 9. Configuration Validation
+
+- Early error detection
+- Validates column mappings
+- Checks date ranges and LTM configuration
+
+### 10. Statistical Utilities
+
+- Year-over-year growth calculations
+- CAGR (Compound Annual Growth Rate)
+- Correlation analysis
+- Statistical significance testing
+
+### 11. Report Generation
+
+- Combine multiple charts into PDF reports
+- Professional formatting
+- Summary tables and metadata
+
+### 12. Logging Infrastructure
+
+- Structured logging with file and console output
+- Analysis execution tracking
+- Configurable log levels
+
+---
+
+## 📚 Documentation
+
+### For AI Agents (Cursor IDE)
+
+The `.cursor/rules/` directory contains comprehensive rules that are automatically loaded by Cursor:
+
+- **`ai_assistant_guide.md`:** Complete guide with ready-to-use prompts
+- **`advanced_analysis_patterns.md`:** Advanced techniques (cohort, PVM, forecasting, etc.)
+- **`analysis_patterns.md`:** Standard patterns for creating analyses
+- **`data_loading.md`:** Always use `data_loader.py`, never `pd.read_csv()` directly
+- **`chart_formatting.md`:** How to format charts correctly
+- **`ltm_methodology.md`:** LTM implementation and usage
+- **`common_errors.md`:** Troubleshooting guide
+- **`code_quality.md`:** Code quality standards and Cursor best practices
+- **`error_handling.md`:** How to write AI-friendly error messages
+
+### For Developers
+
+- **`config.py`:** Heavily commented with all configuration options
+- **`analysis_template.py`:** Template with examples and comments
+- **`analysis_utils.py`:** Well-documented utility functions
+
+---
+
+## 🔍 Common Analysis Types
+
+This template supports all standard sales analyses:
+
+### Revenue Analyses
+- Annual revenue trends
+- Monthly revenue analysis
+- Revenue by segment/product/geography
+
+### Customer Analyses
+- Customer segmentation (RFM)
+- Customer concentration
+- Churn analysis
+- Cohort analysis
+- Customer lifetime value (CLV)
+
+### Product Analyses
+- Product performance
+- Product lifecycle
+- BCG matrix
+- Market basket analysis
+
+### Financial Analyses
+- Price elasticity
+- Contribution margin
+- Price vs volume analysis
+
+### Advanced Analyses
+- Seasonality analysis
+- Time series forecasting
+- Customer churn prediction
+
+**See `examples/` directory for working example scripts, or the original Dukane project for 24+ production analysis scripts.**
+
+---
+
+## 🛠️ Dependencies
+
+Install required packages:
+
+```bash
+pip install -r requirements.txt
+```
+
+**Core dependencies:**
+- `pandas` - Data manipulation
+- `numpy` - Numerical operations
+- `matplotlib` - Charting
+- `seaborn` - Enhanced visualizations
+
+**Optional dependencies** (uncomment in requirements.txt if needed):
+- `openpyxl` - Excel export (export_utils.py)
+- `plotly` - Interactive charts (analysis_utils.py)
+- `reportlab` - PDF reports (report_generator.py)
+- `scipy` - Statistical analysis (statistical_utils.py)
+- `pytest` - Unit testing
+- `pmdarima` - Time series forecasting
+- `mlxtend` - Market basket analysis
+- `scikit-learn` - Machine learning
+
+---
+
+## ⚠️ Important Notes
+
+### Always Use Utilities
+
+**✅ DO:**
+```python
+from data_loader import load_sales_data
+from analysis_utils import setup_revenue_chart, save_chart
+from config import REVENUE_COLUMN, CHART_SIZES
+```
+
+**❌ DON'T:**
+```python
+df = pd.read_csv('data.csv')  # Use data_loader instead
+ax.plot(revenue, ...)  # Divide by 1e6 first, use setup_revenue_chart()
+```
+
+### Chart Formatting
+
+**ALWAYS divide revenue by 1e6 before plotting:**
+```python
+ax.plot(revenue / 1e6, ...)  # Convert to millions
+setup_revenue_chart(ax)  # Apply formatter
+```
+
+### LTM Labeling
+
+**ALWAYS label LTM years correctly:**
+```python
+from config import get_ltm_label
+ltm_label = get_ltm_label()  # Returns "2025 (LTM 9/2025)" or None
+if ltm_label:
+    title += f'\n({ltm_label})'
+```
+
+---
+
+## 🐛 Troubleshooting
+
+### Data Loading Issues
+
+**Problem:** "Data file not found"
+- **Solution:** Check `DATA_FILE` path in config.py
+- **Solution:** Ensure file is in template directory or update `DATA_DIR`
+
+**Problem:** "Required column 'USD' not found"
+- **Solution:** Update `REVENUE_COLUMN` in config.py to match your CSV
+- **Solution:** Check all column mappings in config.py
+
+**Problem:** "All dates are NaN"
+- **Solution:** Add fallback date columns to `DATE_FALLBACK_COLUMNS`
+- **Solution:** Check date format in your CSV
+
+### Analysis Issues
+
+**Problem:** Charts show scientific notation (1e8)
+- **Solution:** Divide by 1e6 before plotting: `ax.plot(data / 1e6, ...)`
+- **Solution:** Use `setup_revenue_chart(ax)` to apply formatter
+
+**Problem:** "DataFrame is empty" after filtering
+- **Solution:** Check `MIN_YEAR` and `MAX_DATE` in config.py
+- **Solution:** Verify `ANALYSIS_YEARS` includes years in your data
+
+**See `.cursor/rules/common_errors.md` for more troubleshooting help.**
+
+---
+
+## 📝 Example Workflow
+
+### Complete Analysis Workflow
+
+1. **Setup:**
+   ```bash
+   python setup_wizard.py
+   ```
+
+2. **Test data loading:**
+   ```bash
+   python -c "from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); print(f'✓ Loaded {len(df):,} rows')"
+   ```
+
+3. **Create analysis:**
+   ```bash
+   cp analysis_template.py revenue_analysis.py
+   # Edit revenue_analysis.py
+   ```
+
+4. **Run analysis:**
+   ```bash
+   python revenue_analysis.py
+   ```
+
+5. **Add to batch runner:**
+   ```python
+   # In run_all_analyses.py:
+   ANALYSIS_SCRIPTS = [
+       'revenue_analysis.py',
+       # ... other analyses
+   ]
+   ```
+
+6. **Run all analyses:**
+   ```bash
+   python run_all_analyses.py
+   ```
+
+---
+
+## 🤝 Best Practices
+
+1. **Always validate data** after loading:
+   ```python
+   is_valid, msg = validate_data_structure(df)
+   ```
+
+2. **Use configuration values** instead of hardcoding:
+   ```python
+   from config import REVENUE_COLUMN  # ✅
+   revenue = df['USD'].sum()  # ❌ Hardcoded
+   ```
+
+3. **Apply exclusion filters** if configured:
+   ```python
+   df = apply_exclusion_filters(df)
+   ```
+
+4. **Validate revenue** at end of each analysis:
+   ```python
+   validate_revenue(df, "Analysis Name")
+   ```
+
+5. **Use utility functions** for consistency:
+   ```python
+   from analysis_utils import calculate_annual_metrics, setup_revenue_chart
+   ```
+
+---
+
+## 📄 License
+
+This template is provided as-is for use in sales analysis projects.
+
+---
+
+## 🙏 Acknowledgments
+
+This template is based on best practices developed during the Dukane Corporation sales analysis project, which included 24+ production-ready analysis scripts and comprehensive documentation.
+
+---
+
+## 📞 Support
+
+For questions or issues:
+1. Check `.cursor/rules/` for detailed patterns and troubleshooting
+2. Review `config.py` comments for configuration options
+3. See example analyses in the original Dukane project
+
+---
+
+**Last Updated:** January 2026  
+**Template Version:** 1.0  
+**Status:** Production Ready
diff --git a/SETUP_CHECKLIST.md b/SETUP_CHECKLIST.md
new file mode 100644
index 0000000..3d956aa
--- /dev/null
+++ b/SETUP_CHECKLIST.md
@@ -0,0 +1,118 @@
+# Setup Checklist
+
+Use this checklist to ensure your template is properly configured before running analyses.
+
+## ✅ Initial Setup
+
+- [ ] **Install dependencies**
+  ```bash
+  pip install -r requirements.txt
+  ```
+
+- [ ] **Run setup wizard**
+  ```bash
+  python setup_wizard.py
+  ```
+
+- [ ] **Place data file** in template directory (or update `DATA_DIR` in config.py)
+
+## ✅ Configuration Verification
+
+Open `config.py` and verify:
+
+- [ ] **Company Information**
+  - [ ] `COMPANY_NAME` is set
+  - [ ] `ANALYSIS_DATE` is current
+
+- [ ] **Data File**
+  - [ ] `DATA_FILE` matches your CSV filename
+  - [ ] File exists in expected location
+
+- [ ] **Column Mappings**
+  - [ ] `REVENUE_COLUMN` matches your CSV
+  - [ ] `DATE_COLUMN` matches your CSV
+  - [ ] `CUSTOMER_COLUMN` matches your CSV (if applicable)
+  - [ ] `ITEM_COLUMN` matches your CSV (if applicable)
+  - [ ] `QUANTITY_COLUMN` matches your CSV (if applicable)
+
+- [ ] **Date Configuration**
+  - [ ] `MIN_YEAR` is correct
+  - [ ] `MAX_DATE` is correct
+  - [ ] `ANALYSIS_YEARS` includes all years you want to analyze
+
+- [ ] **LTM Configuration** (if needed)
+  - [ ] `LTM_ENABLED` is set correctly
+  - [ ] `LTM_START_MONTH`, `LTM_START_YEAR` are correct
+  - [ ] `LTM_END_MONTH`, `LTM_END_YEAR` are correct
+
+- [ ] **Exclusion Filters** (if needed)
+  - [ ] `EXCLUSION_FILTERS['enabled']` is set correctly
+  - [ ] `exclude_by_column` matches a column in your data
+  - [ ] `exclude_values` list is correct
+
+## ✅ Data Loading Test
+
+- [ ] **Test data loading**
+  ```bash
+  python -c "from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); print(f'✓ Loaded {len(df):,} rows')"
+  ```
+
+- [ ] **Verify date coverage**
+  - Check output shows good date coverage (>95% recommended)
+  - Verify date range matches expectations
+
+- [ ] **Verify revenue column**
+  - Check that revenue values are numeric
+  - Verify no unexpected NaN values
+
+## ✅ First Analysis Test
+
+- [ ] **Copy template**
+  ```bash
+  cp analysis_template.py test_analysis.py
+  ```
+
+- [ ] **Run test analysis**
+  ```bash
+  python test_analysis.py
+  ```
+
+- [ ] **Verify outputs**
+  - [ ] Chart generated successfully
+  - [ ] Chart saved to `charts/` directory
+  - [ ] Revenue validation passed
+  - [ ] No errors in console output
+
+## ✅ Common Issues Check
+
+Before running full analyses, verify:
+
+- [ ] **Column names match** - All column mappings in config.py match your CSV
+- [ ] **Date format works** - Dates are parsing correctly (check data_loader output)
+- [ ] **Date range is correct** - MIN_YEAR and MAX_DATE include your data
+- [ ] **LTM is configured** - If using LTM, dates are within your data range
+- [ ] **Exclusions work** - If using exclusions, column and values are correct
+
+## ✅ Ready for Production
+
+Once all checks pass:
+
+- [ ] **Create your analyses** using `analysis_template.py`
+- [ ] **Add to batch runner** in `run_all_analyses.py`
+- [ ] **Run all analyses** to generate complete analysis suite
+
+---
+
+## 🐛 Troubleshooting
+
+If any check fails:
+
+1. **Data loading issues:** See `.cursor/rules/data_loading.md`
+2. **Configuration issues:** Review `config.py` comments
+3. **Common errors:** See `.cursor/rules/common_errors.md`
+4. **Pattern questions:** See `.cursor/rules/analysis_patterns.md`
+
+---
+
+**Checklist Version:** 1.0  
+**Last Updated:** January 2026
diff --git a/TEMPLATE_OVERVIEW.md b/TEMPLATE_OVERVIEW.md
new file mode 100644
index 0000000..754fe92
--- /dev/null
+++ b/TEMPLATE_OVERVIEW.md
@@ -0,0 +1,150 @@
+# Sales Analysis Template - Overview
+
+**Start here for a high-level understanding of the template.**
+
+For detailed setup, see `QUICK_START.md`. For complete documentation, see `README.md`.
+
+## 🎯 Purpose
+
+This template provides a **production-ready, reusable framework** for analyzing sales invoice detail data from any company. It's designed to be:
+
+- **Flexible:** Works with different column names, date formats, and data structures
+- **Automated:** Interactive setup wizard configures everything
+- **AI-Optimized:** Fully optimized for Cursor AI - just ask and the AI generates complete analyses
+- **Best-in-Class:** Based on proven patterns from 24+ production analyses
+
+## 📦 What's Included
+
+### Core Framework
+- **`config.py`** - Centralized configuration (customize for your company)
+- **`data_loader.py`** - Intelligent data loading with fallback logic
+- **`analysis_utils.py`** - Common utilities (formatters, LTM, helpers)
+- **`validate_revenue.py`** - Revenue validation utility
+
+### Templates & Tools
+- **`analysis_template.py`** - Template for creating new analyses
+- **`run_all_analyses.py`** - Batch runner for all scripts
+- **`setup_wizard.py`** - Interactive setup wizard
+
+### Documentation
+- **`README.md`** - Comprehensive documentation
+- **`QUICK_START.md`** - Quick reference guide
+- **`.cursor/rules/`** - Cursor IDE rules for automation
+
+### Configuration
+- **`requirements.txt`** - Python dependencies
+- **`.gitignore`** - Git ignore patterns
+
+## 🚀 Quick Start
+
+1. **Run setup wizard:**
+   ```bash
+   python setup_wizard.py
+   ```
+
+2. **Test data loading:**
+   ```bash
+   python -c "from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); print(f'✓ Loaded {len(df):,} rows')"
+   ```
+
+3. **Create your first analysis:**
+   ```bash
+   cp analysis_template.py my_analysis.py
+   # Edit my_analysis.py
+   python my_analysis.py
+   ```
+
+## 🎨 Key Features
+
+### 1. Flexible Data Loading
+- Handles different column names via configuration
+- Fallback logic for date parsing (100% coverage)
+- Automatic validation
+
+### 2. LTM Support
+- Automatic Last Twelve Months calculation
+- Apples-to-apples comparison with full years
+- Configurable periods
+
+### 3. Standardized Formatting
+- Automatic millions formatter for revenue
+- Consistent chart styling
+- Professional output
+
+### 4. Exclusion Filters
+- Easy configuration for excluding segments
+- Useful for test accounts, business units, etc.
+
+### 5. AI Automation
+- Comprehensive Cursor rules
+- Automated agent assistance
+- Best practices enforcement
+
+## 📊 Analysis Types Supported
+
+This template supports all standard sales analyses:
+
+- **Revenue:** Annual trends, monthly analysis, by segment
+- **Customer:** Segmentation, concentration, churn, CLV
+- **Product:** Performance, lifecycle, BCG matrix
+- **Financial:** Price elasticity, margins
+- **Advanced:** Seasonality, forecasting, predictions
+
+## 🔧 Customization Points
+
+All customization happens in `config.py`:
+
+1. **Company Info:** Name, analysis date
+2. **Data File:** Location, filename
+3. **Column Mappings:** Revenue, date, customer, product, etc.
+4. **Date Range:** Years, LTM configuration
+5. **Filters:** Exclusion rules
+6. **Chart Settings:** Sizes, styles, DPI
+
+## 📚 Documentation Structure
+
+- **`README.md`** - Complete guide (start here)
+- **`QUICK_START.md`** - Quick start (includes Cursor tips)
+- **`EXAMPLES.md`** - Example scripts guide
+- **`TEMPLATE_SUMMARY.md`** - Comprehensive template overview
+- **`.cursor/rules/`** - Detailed patterns for AI agents (auto-loaded by Cursor)
+- **`config.py`** - Heavily commented configuration
+
+## 🎓 Learning Path
+
+1. **Read:** `QUICK_START.md` (5 minutes)
+2. **Run:** `setup_wizard.py` (2 minutes)
+3. **Test:** Data loading (1 minute)
+4. **Create:** First analysis using `analysis_template.py` (15 minutes)
+5. **Explore:** `.cursor/rules/` for patterns (as needed)
+
+## 💡 Best Practices
+
+1. **Always use utilities** - Don't reinvent the wheel
+2. **Use config values** - Never hardcode column names
+3. **Validate data** - After loading and after analysis
+4. **Follow patterns** - See `.cursor/rules/analysis_patterns.md`
+5. **Test incrementally** - Test data loading before full analysis
+
+## 🔍 What Makes This "Best-in-Class"
+
+1. **Proven Patterns:** Based on 24+ production analyses
+2. **Flexibility:** Works with any data structure
+3. **Automation:** Setup wizard + AI-friendly rules
+4. **Documentation:** Comprehensive guides and examples
+5. **Error Handling:** Validation and troubleshooting built-in
+6. **Consistency:** Standardized formatting and patterns
+
+## 📈 Next Steps
+
+1. Run `setup_wizard.py` to configure for your company
+2. Review `config.py` to understand all options
+3. Create your first analysis using `analysis_template.py`
+4. Explore `.cursor/rules/` for detailed patterns
+5. Build your analysis suite
+
+---
+
+**Template Version:** 1.0  
+**Last Updated:** January 2026  
+**Status:** Production Ready
diff --git a/TEMPLATE_SUMMARY.md b/TEMPLATE_SUMMARY.md
new file mode 100644
index 0000000..4b5af0b
--- /dev/null
+++ b/TEMPLATE_SUMMARY.md
@@ -0,0 +1,254 @@
+# Sales Analysis Template - Summary
+
+**This document provides a comprehensive overview of the template structure and capabilities.**
+
+For quick start, see `QUICK_START.md`. For detailed documentation, see `README.md`.
+
+## 📋 What This Template Provides
+
+This template was created based on the comprehensive Dukane Corporation sales analysis project, which included 24+ production-ready analysis scripts. All best practices, patterns, and lessons learned have been distilled into this reusable template.
+
+## 📁 Complete File Structure
+
+```
+sales_analysis_template/
+├── README.md                    # Comprehensive documentation
+├── QUICK_START.md              # Quick reference guide
+├── TEMPLATE_OVERVIEW.md        # Template overview and features
+├── TEMPLATE_SUMMARY.md         # This file
+├── EXAMPLES.md                  # Example scripts guide
+├── SETUP_CHECKLIST.md          # Setup verification checklist
+├── requirements.txt            # Python dependencies
+├── .gitignore                  # Git ignore patterns
+│
+├── Core Framework Files:
+│   ├── config.py               # ⭐ Centralized configuration
+│   ├── config_validator.py     # Configuration validation utility
+│   ├── data_loader.py          # ⭐ Intelligent data loading
+│   ├── data_quality.py         # Data quality reporting
+│   ├── data_processing.py      # Data transformation utilities
+│   ├── analysis_utils.py       # ⭐ Common utilities
+│   ├── statistical_utils.py    # Statistical analysis utilities
+│   └── validate_revenue.py     # Revenue validation
+│
+├── Utility Files:
+│   ├── export_utils.py         # Export to CSV/Excel
+│   ├── report_generator.py     # PDF report generation
+│   ├── logger_config.py        # Logging configuration
+│   └── generate_sample_data.py # Generate sample data for testing
+│
+├── Templates & Tools:
+│   ├── analysis_template.py   # Template for new analyses
+│   ├── run_all_analyses.py    # Batch runner
+│   └── setup_wizard.py        # Interactive setup wizard
+│
+├── examples/                   # Example analysis scripts
+│   ├── annual_revenue_trend.py # Simple annual revenue analysis
+│   ├── customer_segmentation.py # RFM customer segmentation
+│   ├── cohort_analysis.py      # Customer cohort analysis
+│   └── product_performance.py  # Product performance analysis
+│
+├── tests/                      # Unit tests
+│   ├── test_data_loader.py     # Data loader tests
+│   ├── test_analysis_utils.py  # Analysis utils tests
+│   └── test_config_validator.py # Config validator tests
+│
+└── .cursor/
+    └── rules/                  # Cursor IDE rules (auto-loaded)
+        ├── ai_assistant_guide.md # Complete AI assistant guide
+        ├── advanced_analysis_patterns.md # Advanced techniques
+        ├── analysis_patterns.md # Analysis patterns
+        ├── chart_formatting.md  # Chart formatting rules
+        ├── code_quality.md      # Code quality standards
+        ├── common_errors.md     # Error troubleshooting
+        ├── data_loading.md      # Data loading patterns
+        ├── error_handling.md    # Error handling patterns
+        └── ltm_methodology.md   # LTM methodology
+```
+
+## 🎯 Key Features Implemented
+
+### 1. Flexible Configuration System
+- **`config.py`**: Centralized configuration with extensive comments
+- All column names, date ranges, and settings configurable
+- No hardcoded values - everything comes from config
+
+### 2. Intelligent Data Loading
+- **`data_loader.py`**: Fallback logic for date parsing
+- Handles missing dates gracefully
+- 100% date coverage via fallback columns
+- Automatic validation and error reporting
+
+### 3. Comprehensive Utilities
+- **`analysis_utils.py`**: All common functions in one place
+- Chart formatters (millions, thousands)
+- LTM calculation helpers
+- Mixed type handling for years
+- Price calculation utilities
+- Exclusion filter helpers
+
+### 4. Interactive Setup
+- **`setup_wizard.py`**: Asks clarifying questions
+- Automatically configures `config.py`
+- Validates inputs
+- Provides next steps
+
+### 5. AI-Friendly Rules
+- **`.cursor/rules/`**: Comprehensive Cursor IDE rules
+- Auto-loaded by Cursor
+- Enforces best practices
+- Provides patterns and troubleshooting
+
+### 6. Production-Ready Templates
+- **`analysis_template.py`**: Complete template with examples
+- **`run_all_analyses.py`**: Batch runner with error handling
+- Follows all best practices
+
+## 🔑 Design Principles
+
+### Flexibility
+- Works with any column names (configured in config.py)
+- Handles different date formats
+- Supports various data structures
+- Optional features (LTM, exclusions) can be disabled
+
+### Automation
+- Setup wizard asks all necessary questions
+- Cursor rules guide AI agents automatically
+- Batch runner handles multiple analyses
+- Validation catches errors early
+
+### Best Practices
+- Always use utilities (never reinvent the wheel)
+- Consistent formatting across all analyses
+- Proper error handling and validation
+- Comprehensive documentation
+
+### Reusability
+- Generic enough for any company
+- Specific enough to be immediately useful
+- Well-documented for future agents
+- Easy to extend with new analyses
+
+## 📊 Analysis Types Supported
+
+The template supports all standard sales analyses:
+
+### Revenue Analyses
+- Annual revenue trends
+- Monthly revenue analysis
+- Revenue by segment/product/geography
+
+### Customer Analyses
+- Customer segmentation (RFM)
+- Customer concentration
+- Churn analysis
+- Cohort analysis
+- Customer lifetime value (CLV)
+
+### Product Analyses
+- Product performance
+- Product lifecycle
+- BCG matrix
+- Market basket analysis
+
+### Financial Analyses
+- Price elasticity
+- Contribution margin
+- Price vs volume analysis
+
+### Advanced Analyses
+- Seasonality analysis
+- Time series forecasting
+- Customer churn prediction
+
+## 🚀 Usage Workflow
+
+1. **Setup** (5 minutes)
+   - Run `setup_wizard.py`
+   - Answer questions about your data
+   - Configuration automatically updated
+
+2. **Test** (2 minutes)
+   - Test data loading
+   - Verify configuration works
+
+3. **Create** (15 minutes)
+   - Copy `analysis_template.py`
+   - Customize for your analysis
+   - Run and verify
+
+4. **Scale** (ongoing)
+   - Create multiple analyses
+   - Add to batch runner
+   - Generate complete analysis suite
+
+## 💡 What Makes This "Best-in-Class"
+
+1. **Proven Patterns**: Based on 24+ production analyses
+2. **Comprehensive**: Covers all common analysis types
+3. **Flexible**: Works with any data structure
+4. **Automated**: Setup wizard + AI-friendly rules
+5. **Documented**: Extensive documentation at every level
+6. **Production-Ready**: Error handling, validation, best practices
+
+## 📚 Documentation Hierarchy
+
+1. **`QUICK_START.md`** - Start here (5-minute overview, includes Cursor tips)
+2. **`README.md`** - Complete guide (comprehensive)
+3. **`EXAMPLES.md`** - Example scripts guide
+4. **`TEMPLATE_OVERVIEW.md`** - High-level overview
+5. **`SETUP_CHECKLIST.md`** - Verification checklist
+6. **`.cursor/rules/`** - Detailed patterns for AI agents (auto-loaded by Cursor)
+7. **`config.py`** - Inline comments for all options
+
+## 🎓 Learning Resources
+
+- **Quick Start**: `QUICK_START.md` - Get running in 5 minutes
+- **Full Guide**: `README.md` - Complete documentation
+- **Patterns**: `.cursor/rules/analysis_patterns.md` - Code patterns
+- **Troubleshooting**: `.cursor/rules/common_errors.md` - Fix issues
+- **Examples**: `analysis_template.py` - Working example
+
+## ✅ Quality Assurance
+
+All components include:
+- ✅ Error handling
+- ✅ Input validation
+- ✅ Comprehensive comments
+- ✅ Type hints where helpful
+- ✅ Documentation strings
+- ✅ Best practices enforcement
+
+## 🔄 Future Enhancements
+
+Potential additions (not included in v1.0):
+- Example analysis scripts (can be added from Dukane project)
+- Unit tests
+- CI/CD configuration
+- Docker containerization
+- Additional visualization libraries
+
+## 📝 Notes for Users
+
+1. **First Time**: Start with `QUICK_START.md` and `setup_wizard.py`
+2. **Configuration**: All customization in `config.py`
+3. **Creating Analyses**: Use `analysis_template.py` as starting point
+4. **AI Assistance**: Cursor rules are auto-loaded, just ask for help
+5. **Troubleshooting**: Check `.cursor/rules/common_errors.md` first
+
+## 🎉 Success Criteria
+
+The template is ready when:
+- ✅ Setup wizard runs successfully
+- ✅ Data loads without errors
+- ✅ First analysis generates charts
+- ✅ All validations pass
+- ✅ Documentation is clear
+
+---
+
+**Template Version:** 1.0  
+**Created:** January 2026  
+**Based On:** Dukane Corporation Sales Analysis Project  
+**Status:** Production Ready ✅
diff --git a/analysis_template.py b/analysis_template.py
new file mode 100644
index 0000000..429c617
--- /dev/null
+++ b/analysis_template.py
@@ -0,0 +1,147 @@
+"""
+Template for creating new analysis scripts
+Copy this file and modify for your specific analysis
+
+Usage:
+1. Copy this file: cp analysis_template.py my_new_analysis.py
+2. Update the ANALYSIS_NAME and DESCRIPTION
+3. Implement your analysis logic in the main() function
+4. Update the chart generation section
+5. Run: python my_new_analysis.py
+"""
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from pathlib import Path
+
+# Import utilities
+from data_loader import load_sales_data, validate_data_structure
+from validate_revenue import validate_revenue
+from analysis_utils import (
+    get_ltm_period_config, get_annual_data, calculate_annual_metrics,
+    get_millions_formatter, setup_revenue_chart, save_chart,
+    format_currency, print_annual_summary, sort_mixed_years,
+    apply_exclusion_filters
+)
+from config import (
+    DATA_FILE, OUTPUT_DIR, ANALYSIS_YEARS, MAX_DATE,
+    CHART_SIZES, ensure_directories, get_data_path, COMPANY_NAME
+)
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+ANALYSIS_NAME = "Template Analysis"
+DESCRIPTION = "Template for new analyses - customize this for your specific analysis"
+
+# ============================================================================
+# MAIN ANALYSIS FUNCTION
+# ============================================================================
+
+def main():
+    """Main analysis function"""
+    
+    print(f"\n{'='*60}")
+    print(f"{ANALYSIS_NAME}")
+    print(f"{'='*60}\n")
+    
+    # 1. Load data
+    print("Loading data...")
+    try:
+        df = load_sales_data(get_data_path())
+        print(f"Loaded {len(df):,} transactions")
+    except Exception as e:
+        print(f"ERROR loading data: {e}")
+        return
+    
+    # 2. Validate data structure
+    is_valid, msg = validate_data_structure(df)
+    if not is_valid:
+        print(f"ERROR: {msg}")
+        return
+    print("Data validation passed")
+    
+    # 3. Apply exclusion filters (if configured)
+    df = apply_exclusion_filters(df)
+    
+    # 4. Filter by date range
+    from config import MIN_YEAR, DATE_COLUMN
+    df = df[df['Year'] >= MIN_YEAR]
+    if DATE_COLUMN in df.columns:
+        df = df[df[DATE_COLUMN] <= MAX_DATE]
+    
+    # 5. Setup LTM period (if enabled)
+    ltm_start, ltm_end = get_ltm_period_config()
+    if ltm_start and ltm_end:
+        print(f"LTM period: {ltm_start} to {ltm_end}")
+    
+    # 6. Prepare data
+    print("\nPreparing data...")
+    # Add your data preparation logic here
+    # Example: df['CustomColumn'] = df[REVENUE_COLUMN] * df[QUANTITY_COLUMN]
+    
+    # 7. Calculate annual metrics
+    print("\nCalculating annual metrics...")
+    
+    def calculate_metrics(year_data):
+        """Calculate metrics for a single year"""
+        from config import REVENUE_COLUMN
+        return {
+            'Revenue': year_data[REVENUE_COLUMN].sum(),
+            # Add your custom metrics here
+            # 'CustomMetric': year_data['CustomColumn'].mean(),
+        }
+    
+    annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
+    
+    # 8. Print summary
+    print_annual_summary(annual_df, 'Revenue', 'Revenue')
+    
+    # 9. Create visualizations
+    print("Generating charts...")
+    ensure_directories()
+    
+    # Example chart: Annual revenue trend
+    fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+    
+    # Prepare data for plotting (handle mixed types)
+    annual_df_sorted = sort_mixed_years(annual_df.reset_index(), 'Year')
+    years = annual_df_sorted['Year'].tolist()
+    revenue = annual_df_sorted['Revenue'].values / 1e6  # Convert to millions
+    
+    # Create chart
+    ax.plot(range(len(years)), revenue, marker='o', linewidth=2, markersize=8)
+    ax.set_xticks(range(len(years)))
+    ax.set_xticklabels(years, rotation=45, ha='right')
+    setup_revenue_chart(ax)
+    
+    # Add LTM notation to title if applicable
+    title = f'Annual Revenue Trend - {COMPANY_NAME}'
+    if ltm_start and ltm_end:
+        from config import get_ltm_label
+        ltm_label = get_ltm_label()
+        if ltm_label:
+            title += f'\n({ltm_label})'
+    ax.set_title(title)
+    
+    plt.tight_layout()
+    save_chart(fig, f'{ANALYSIS_NAME.lower().replace(" ", "_")}_trend.png')
+    plt.close()
+    
+    # Add more charts as needed...
+    
+    # 10. Validate revenue
+    print("\nValidating revenue...")
+    validate_revenue(df, ANALYSIS_NAME)
+    
+    print(f"\n{ANALYSIS_NAME} complete!")
+    print(f"Charts saved to: {OUTPUT_DIR}")
+
+# ============================================================================
+# RUN ANALYSIS
+# ============================================================================
+
+if __name__ == "__main__":
+    main()
diff --git a/analysis_utils.py b/analysis_utils.py
new file mode 100644
index 0000000..12d7c83
--- /dev/null
+++ b/analysis_utils.py
@@ -0,0 +1,510 @@
+"""
+Common utilities for analysis scripts
+Provides formatters, LTM setup, and helper functions
+
+This module is designed to work with any sales data structure
+by using configuration from config.py
+"""
+import pandas as pd
+import numpy as np
+from matplotlib.ticker import FuncFormatter
+from pathlib import Path
+from config import (
+    REVENUE_COLUMN, LTM_ENABLED, get_ltm_period, get_ltm_label,
+    OUTPUT_DIR, CHART_DPI, CHART_BBOX
+)
+
+# ============================================================================
+# CHART FORMATTERS
+# ============================================================================
+
+def millions_formatter(x: float, pos: int) -> str:
+    """
+    Format numbers in millions for chart display (e.g., $99.9m)
+    
+    This formatter is used with matplotlib FuncFormatter to display
+    revenue values in millions on chart axes.
+    
+    Args:
+        x: Numeric value (already in millions, e.g., 99.9 for $99.9m)
+        pos: Position parameter (required by FuncFormatter, not used)
+    
+    Returns:
+        str: Formatted string like "$99.9m"
+    
+    Example:
+        >>> from matplotlib.ticker import FuncFormatter
+        >>> formatter = FuncFormatter(millions_formatter)
+        >>> ax.yaxis.set_major_formatter(formatter)
+    """
+    return f'${x:.1f}m'
+
+def thousands_formatter(x: float, pos: int) -> str:
+    """
+    Format numbers in thousands for chart display (e.g., $99.9k)
+    
+    Args:
+        x: Numeric value (already in thousands)
+        pos: Position parameter (required by FuncFormatter, not used)
+    
+    Returns:
+        str: Formatted string like "$99.9k"
+    """
+    return f'${x:.1f}k'
+
+def get_millions_formatter() -> FuncFormatter:
+    """
+    Get FuncFormatter for millions
+    
+    Returns:
+        FuncFormatter: Configured formatter for millions display
+    """
+    return FuncFormatter(millions_formatter)
+
+def get_thousands_formatter() -> FuncFormatter:
+    """
+    Get FuncFormatter for thousands
+    
+    Returns:
+        FuncFormatter: Configured formatter for thousands display
+    """
+    return FuncFormatter(thousands_formatter)
+
+# ============================================================================
+# LTM (Last Twelve Months) SETUP
+# ============================================================================
+
+def get_ltm_period_config():
+    """
+    Get LTM period boundaries from config
+    
+    Returns:
+        tuple: (ltm_start, ltm_end) as pd.Period objects, or (None, None) if disabled
+    """
+    if LTM_ENABLED:
+        return get_ltm_period()
+    return None, None
+
+def get_annual_data(df, year, ltm_start=None, ltm_end=None):
+    """
+    Get data for a specific year, using LTM for the most recent partial year
+    
+    Args:
+        df: DataFrame with 'Year' and 'YearMonth' columns
+        year: Year to extract (int)
+        ltm_start: LTM start period (defaults to config if None)
+        ltm_end: LTM end period (defaults to config if None)
+    
+    Returns:
+        tuple: (year_data DataFrame, year_label string)
+    """
+    from config import LTM_END_YEAR
+    
+    # Get LTM period from config if not provided
+    if ltm_start is None or ltm_end is None:
+        ltm_start, ltm_end = get_ltm_period_config()
+    
+    # Use LTM for the most recent year if enabled
+    if LTM_ENABLED and ltm_start and ltm_end and year == LTM_END_YEAR:
+        if 'YearMonth' in df.columns:
+            year_data = df[(df['YearMonth'] >= ltm_start) & (df['YearMonth'] <= ltm_end)]
+            year_label = get_ltm_label() or str(year)
+        else:
+            # Fallback if YearMonth not available
+            year_data = df[df['Year'] == year]
+            year_label = str(year)
+    else:
+        # Use full calendar year
+        year_data = df[df['Year'] == year]
+        year_label = str(year)
+    
+    return year_data, year_label
+
+def calculate_annual_metrics(df, metrics_func, ltm_start=None, ltm_end=None):
+    """
+    Calculate annual metrics for all years, using LTM for most recent year
+    
+    Args:
+        df: DataFrame with 'Year' and 'YearMonth' columns
+        metrics_func: Function that takes a DataFrame and returns a dict of metrics
+        ltm_start: LTM start period (defaults to config if None)
+        ltm_end: LTM end period (defaults to config if None)
+    
+    Returns:
+        DataFrame with 'Year' index and metric columns
+    """
+    from config import ANALYSIS_YEARS
+    
+    if ltm_start is None or ltm_end is None:
+        ltm_start, ltm_end = get_ltm_period_config()
+    
+    annual_data = []
+    for year in sorted(ANALYSIS_YEARS):
+        if year in df['Year'].unique():
+            year_data, year_label = get_annual_data(df, year, ltm_start, ltm_end)
+            
+            if len(year_data) > 0:
+                metrics = metrics_func(year_data)
+                metrics['Year'] = year_label
+                annual_data.append(metrics)
+    
+    if not annual_data:
+        return pd.DataFrame()
+    
+    return pd.DataFrame(annual_data).set_index('Year')
+
+# ============================================================================
+# MIXED TYPE HANDLING
+# ============================================================================
+
+def create_year_sort_column(df, year_col='Year'):
+    """
+    Create a numeric sort column for mixed int/str year columns
+    
+    Args:
+        df: DataFrame
+        year_col: Name of year column
+    
+    Returns:
+        Series with numeric sort values
+    """
+    from config import LTM_END_YEAR
+    
+    def sort_value(x):
+        if isinstance(x, str) and str(LTM_END_YEAR) in str(x):
+            return float(LTM_END_YEAR) + 0.5
+        elif isinstance(x, (int, float)):
+            return float(x)
+        else:
+            return 9999
+    
+    return df[year_col].apply(sort_value)
+
+def sort_mixed_years(df, year_col='Year'):
+    """
+    Sort DataFrame by year column that may contain mixed int/str types
+    
+    Args:
+        df: DataFrame
+        year_col: Name of year column
+    
+    Returns:
+        Sorted DataFrame
+    """
+    df = df.copy()
+    df['_Year_Sort'] = create_year_sort_column(df, year_col)
+    df = df.sort_values('_Year_Sort').drop(columns=['_Year_Sort'])
+    return df
+
+def safe_year_labels(years):
+    """
+    Convert year values to safe string labels for chart axes
+    
+    Args:
+        years: Iterable of year values (int or str)
+    
+    Returns:
+        List of string labels
+    """
+    return [str(year) for year in years]
+
+# ============================================================================
+# CHART HELPERS
+# ============================================================================
+
+def setup_revenue_chart(ax, ylabel: str = 'Revenue (Millions USD)') -> None:
+    """
+    Setup a chart axis for revenue display (millions)
+    
+    CRITICAL: Always use this function for revenue charts. It applies
+    the millions formatter and standard styling.
+    
+    IMPORTANT: Data must be divided by 1e6 BEFORE plotting:
+        ax.plot(revenue / 1e6, ...)  # ✅ Correct
+        ax.plot(revenue, ...)         # ❌ Wrong - will show scientific notation
+    
+    Args:
+        ax: Matplotlib axis object to configure
+        ylabel: Y-axis label (default: 'Revenue (Millions USD)')
+    
+    Returns:
+        None: Modifies ax in place
+    
+    Example:
+        >>> import matplotlib.pyplot as plt
+        >>> from analysis_utils import setup_revenue_chart
+        >>> fig, ax = plt.subplots()
+        >>> ax.plot(revenue_data / 1e6, marker='o')  # Divide by 1e6 first!
+        >>> setup_revenue_chart(ax)
+        >>> plt.show()
+    
+    See Also:
+        - .cursor/rules/chart_formatting.md for detailed patterns
+        - save_chart() for saving charts
+    """
+    ax.yaxis.set_major_formatter(get_millions_formatter())
+    ax.set_ylabel(ylabel)
+    ax.grid(True, alpha=0.3)
+
+def save_chart(fig, filename, output_dir=None):
+    """
+    Save chart to file with organized directory structure
+    
+    Args:
+        fig: Matplotlib figure object
+        filename: Output filename (e.g., 'revenue_trend.png')
+        output_dir: Output directory (defaults to config.OUTPUT_DIR)
+    """
+    if output_dir is None:
+        output_dir = OUTPUT_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    output_dir.mkdir(exist_ok=True)
+    
+    filepath = output_dir / filename
+    fig.savefig(filepath, dpi=CHART_DPI, bbox_inches=CHART_BBOX, format='png')
+    print(f"Chart saved: {filepath}")
+
+# ============================================================================
+# DATA VALIDATION
+# ============================================================================
+
+def validate_dataframe(df, required_columns=None):
+    """
+    Validate DataFrame has required columns and basic data quality
+    
+    Args:
+        df: DataFrame to validate
+        required_columns: List of required column names (defaults to config)
+    
+    Returns:
+        tuple: (is_valid bool, error_message str)
+    """
+    if required_columns is None:
+        required_columns = [REVENUE_COLUMN, 'Year']
+        if 'YearMonth' in df.columns:
+            required_columns.append('YearMonth')
+    
+    missing_cols = [col for col in required_columns if col not in df.columns]
+    if missing_cols:
+        return False, f"Missing required columns: {missing_cols}"
+    
+    if len(df) == 0:
+        return False, "DataFrame is empty"
+    
+    if REVENUE_COLUMN in df.columns:
+        if df[REVENUE_COLUMN].isna().all():
+            return False, f"All {REVENUE_COLUMN} values are NaN"
+    
+    return True, "OK"
+
+# ============================================================================
+# PRICE CALCULATION
+# ============================================================================
+
+def calculate_price_per_unit(df, quantity_col=None, revenue_col=None):
+    """
+    Calculate average price per unit, excluding invalid quantities
+    
+    Args:
+        df: DataFrame with quantity and revenue columns
+        quantity_col: Name of quantity column (defaults to config)
+        revenue_col: Name of revenue column (defaults to config)
+    
+    Returns:
+        float: Average price per unit
+    """
+    from config import QUANTITY_COLUMN, REVENUE_COLUMN, MIN_QUANTITY, MAX_QUANTITY
+    
+    if quantity_col is None:
+        quantity_col = QUANTITY_COLUMN
+    if revenue_col is None:
+        revenue_col = REVENUE_COLUMN
+    
+    # Check if quantity column exists
+    if quantity_col not in df.columns:
+        return np.nan
+    
+    # Filter for valid quantity transactions
+    df_valid = df[(df[quantity_col] > MIN_QUANTITY) & (df[quantity_col] <= MAX_QUANTITY)].copy()
+    
+    if len(df_valid) == 0:
+        return np.nan
+    
+    total_revenue = df_valid[revenue_col].sum()
+    total_quantity = df_valid[quantity_col].sum()
+    
+    if total_quantity == 0:
+        return np.nan
+    
+    return total_revenue / total_quantity
+
+# ============================================================================
+# OUTPUT FORMATTING
+# ============================================================================
+
+def format_currency(value: float, millions: bool = True) -> str:
+    """
+    Format currency value for console output
+    
+    Args:
+        value: Numeric value to format
+        millions: If True, format as millions ($X.Xm), else thousands ($X.Xk)
+    
+    Returns:
+        str: Formatted string like "$99.9m" or "$99.9k" or "N/A" if NaN
+    
+    Example:
+        >>> format_currency(1000000)
+        '$1.00m'
+        >>> format_currency(1000, millions=False)
+        '$1.00k'
+    """
+    if pd.isna(value):
+        return "N/A"
+    
+    if millions:
+        return f"${value / 1e6:.2f}m"
+    else:
+        return f"${value / 1e3:.2f}k"
+
+def print_annual_summary(annual_df, metric_col='Revenue', label='Revenue'):
+    """
+    Print formatted annual summary to console
+    
+    Args:
+        annual_df: DataFrame with annual metrics (indexed by Year)
+        metric_col: Column name to print
+        label: Label for the metric
+    """
+    print(f"\n{label} by Year:")
+    print("-" * 40)
+    for year in annual_df.index:
+        value = annual_df.loc[year, metric_col]
+        formatted = format_currency(value)
+        print(f"  {year}: {formatted}")
+    print()
+
+# ============================================================================
+# DATA FILTERING HELPERS
+# ============================================================================
+
+def apply_exclusion_filters(df):
+    """
+    Apply exclusion filters from config
+    
+    Args:
+        df: DataFrame to filter
+    
+    Returns:
+        Filtered DataFrame
+    """
+    from config import EXCLUSION_FILTERS
+    
+    if not EXCLUSION_FILTERS.get('enabled', False):
+        return df
+    
+    exclude_col = EXCLUSION_FILTERS.get('exclude_by_column')
+    exclude_values = EXCLUSION_FILTERS.get('exclude_values', [])
+    
+    if exclude_col and exclude_col in df.columns and exclude_values:
+        original_count = len(df)
+        df_filtered = df[~df[exclude_col].isin(exclude_values)]
+        excluded_count = original_count - len(df_filtered)
+        if excluded_count > 0:
+            print(f"Excluded {excluded_count:,} rows based on {exclude_col} filter")
+        return df_filtered
+    
+    return df
+
+# ============================================================================
+# INTERACTIVE VISUALIZATIONS (OPTIONAL - PLOTLY)
+# ============================================================================
+
+def create_interactive_chart(data, chart_type='line', title=None, xlabel=None, ylabel=None):
+    """
+    Create interactive chart using Plotly (optional dependency)
+    
+    Args:
+        data: DataFrame or dict with chart data
+        chart_type: Type of chart ('line', 'bar', 'scatter')
+        title: Chart title
+        xlabel: X-axis label
+        ylabel: Y-axis label
+    
+    Returns:
+        plotly.graph_objects.Figure: Plotly figure object
+    
+    Raises:
+        ImportError: If plotly is not installed
+    
+    Example:
+        fig = create_interactive_chart(
+            {'x': [1, 2, 3], 'y': [10, 20, 30]},
+            chart_type='line',
+            title='Revenue Trend'
+        )
+        fig.show()
+    """
+    try:
+        import plotly.graph_objects as go
+        from plotly.subplots import make_subplots
+    except ImportError:
+        raise ImportError(
+            "plotly is required for interactive charts. Install with: pip install plotly"
+        )
+    
+    fig = go.Figure()
+    
+    if chart_type == 'line':
+        if isinstance(data, dict) and 'x' in data and 'y' in data:
+            fig.add_trace(go.Scatter(
+                x=data['x'],
+                y=data['y'],
+                mode='lines+markers',
+                name='Data'
+            ))
+    elif chart_type == 'bar':
+        if isinstance(data, dict) and 'x' in data and 'y' in data:
+            fig.add_trace(go.Bar(
+                x=data['x'],
+                y=data['y'],
+                name='Data'
+            ))
+    
+    if title:
+        fig.update_layout(title=title)
+    if xlabel:
+        fig.update_xaxes(title_text=xlabel)
+    if ylabel:
+        fig.update_yaxes(title_text=ylabel)
+    
+    fig.update_layout(
+        template='plotly_white',
+        hovermode='x unified'
+    )
+    
+    return fig
+
+def save_interactive_chart(fig, filename, output_dir=None):
+    """
+    Save interactive Plotly chart to HTML file
+    
+    Args:
+        fig: Plotly figure object
+        filename: Output filename (e.g., 'chart.html')
+        output_dir: Output directory (defaults to config.OUTPUT_DIR)
+    """
+    if output_dir is None:
+        output_dir = OUTPUT_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    output_dir.mkdir(exist_ok=True)
+    filepath = output_dir / filename
+    
+    fig.write_html(str(filepath))
+    print(f"Interactive chart saved: {filepath}")
+    
+    return filepath
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..a308c51
--- /dev/null
+++ b/config.py
@@ -0,0 +1,277 @@
+"""
+Configuration file for sales analysis scripts
+CONFIGURE THIS FILE FOR YOUR COMPANY'S SPECIFIC DATA STRUCTURE
+
+This file should be customized based on:
+- Your data file name and location
+- Column names in your sales data
+- Date range and LTM period
+- Company-specific settings
+
+CRITICAL: All column names, file paths, and settings are defined here.
+Never hardcode these values in analysis scripts - always import from config.
+
+Usage:
+    from config import REVENUE_COLUMN, DATE_COLUMN, get_data_path
+    revenue = df[REVENUE_COLUMN].sum()  # ✅ Correct
+    revenue = df['USD'].sum()           # ❌ Wrong - hardcoded
+
+Quick Setup:
+    1. Run: python setup_wizard.py (interactive configuration)
+    2. Or manually edit this file following the TODO comments
+    3. Validate: python config_validator.py
+
+See Also:
+    - .cursor/rules/analysis_patterns.md - How to use config values
+    - setup_wizard.py - Interactive configuration tool
+    - config_validator.py - Configuration validation
+"""
+from pathlib import Path
+from typing import Optional, Tuple
+import pandas as pd
+
+# ============================================================================
+# COMPANY INFORMATION
+# ============================================================================
+# TODO: Update these values for your company
+COMPANY_NAME = "Your Company Name"  # Update this
+ANALYSIS_DATE = "2026-01-12"  # Update this to current date
+
+# ============================================================================
+# DATA FILES
+# ============================================================================
+# TODO: Update with your actual data file name
+DATA_FILE = 'sales_data.csv'  # Update this to your CSV file name
+OUTPUT_DIR = Path('charts')
+REPORTS_DIR = Path('reports')
+DATA_DIR = Path('data')  # Optional: if data is in a subdirectory
+
+# ============================================================================
+# DATA COLUMN MAPPINGS
+# ============================================================================
+# TODO: Map these to your actual column names
+# These are the expected column names - update if your CSV uses different names
+
+# Revenue column (REQUIRED)
+REVENUE_COLUMN = 'USD'  # Common alternatives: 'Amount', 'Revenue', 'Total', 'Sales'
+
+# Date columns (at least one required)
+DATE_COLUMN = 'InvoiceDate'  # Primary date column
+DATE_FALLBACK_COLUMNS = ['Month', 'Year']  # Fallback columns if primary is missing
+
+# Customer/Account columns
+CUSTOMER_COLUMN = 'Customer'  # Common alternatives: 'Account', 'CustomerName', 'Client'
+
+# Product/Item columns
+ITEM_COLUMN = 'Item'  # Common alternatives: 'Product', 'SKU', 'ItemCode'
+PRODUCT_GROUP_COLUMN = 'ProductGroup'  # Optional: for product categorization
+QUANTITY_COLUMN = 'Quantity'  # Optional: for price calculations
+
+# Geographic columns (optional)
+REGION_COLUMN = 'Region'  # Optional: for geographic analysis
+COUNTRY_COLUMN = 'Country'  # Optional: for country-level analysis
+
+# Segment/Category columns (optional - customize based on your data)
+SEGMENT_COLUMNS = {
+    'Technology': 'Technology',  # Optional: technology/product type
+    'EndMarket': 'EndMarket',    # Optional: end market/industry
+    'ProductGroup': 'ProductGroup',  # Optional: product category
+}
+
+# Invoice/Transaction columns
+INVOICE_NUMBER_COLUMN = 'Invoice #'  # Optional: for transaction-level analysis
+
+# ============================================================================
+# DATE RANGE CONFIGURATION
+# ============================================================================
+# TODO: Update these based on your data and analysis needs
+
+# Analysis years (years to include in analysis)
+ANALYSIS_YEARS = [2021, 2022, 2023, 2024, 2025]  # Update based on your data
+
+# LTM (Last Twelve Months) Configuration
+# For the most recent partial year, use LTM for apples-to-apples comparison
+# Example: If latest data is through September 2025, use Oct 2024 - Sep 2025
+LTM_ENABLED = True  # Set to False if you have complete calendar years only
+LTM_START_MONTH = 10  # Month number (1-12) for LTM start
+LTM_START_YEAR = 2024  # Year for LTM start
+LTM_END_MONTH = 9  # Month number (1-12) for LTM end
+LTM_END_YEAR = 2025  # Year for LTM end
+
+# Generate LTM period objects
+if LTM_ENABLED:
+    LTM_START = pd.Period(f'{LTM_START_YEAR}-{LTM_START_MONTH:02d}', freq='M')
+    LTM_END = pd.Period(f'{LTM_END_YEAR}-{LTM_END_MONTH:02d}', freq='M')
+    LTM_LABEL = f'{LTM_END_YEAR} (LTM {LTM_END_MONTH}/{LTM_END_YEAR})'
+else:
+    LTM_START = None
+    LTM_END = None
+    LTM_LABEL = None
+
+# Data date range (filter data to this range)
+MIN_YEAR = 2021  # Minimum year to include
+MAX_DATE = pd.Timestamp('2025-09-30')  # Maximum date to include (update based on your data)
+
+# ============================================================================
+# CHART SETTINGS
+# ============================================================================
+CHART_DPI = 300
+CHART_FORMAT = 'png'
+CHART_BBOX = 'tight'
+CHART_STYLE = 'seaborn-v0_8'  # Options: 'default', 'ggplot', 'seaborn-v0_8', etc.
+
+# Chart size presets
+CHART_SIZES = {
+    'small': (6, 4),
+    'medium': (10, 6),
+    'large': (12, 8),
+    'wide': (14, 6)
+}
+
+# ============================================================================
+# DATA FILTERING
+# ============================================================================
+# Quantity filtering for price calculations (exclude outliers)
+MIN_QUANTITY = 0  # Minimum valid quantity
+MAX_QUANTITY = 1000  # Maximum valid quantity (adjust based on your data)
+
+# Revenue filtering (optional - exclude negative values, returns, etc.)
+EXCLUDE_NEGATIVE_REVENUE = False  # Set to True to exclude negative revenue (returns/credits)
+MIN_REVENUE = None  # Optional: minimum revenue threshold
+
+# ============================================================================
+# EXCLUSION FILTERS (Optional)
+# ============================================================================
+# Use this section to exclude specific segments, customers, or products
+# Example: Exclude a business unit, test accounts, etc.
+
+EXCLUSION_FILTERS = {
+    'enabled': False,  # Set to True to enable exclusions
+    'exclude_by_column': None,  # Column name to filter on (e.g., 'Country', 'Segment')
+    'exclude_values': [],  # List of values to exclude (e.g., ['KVT', 'Test'])
+}
+
+# ============================================================================
+# VALIDATION THRESHOLDS (Optional)
+# ============================================================================
+# Expected revenue ranges for validation (update based on your company)
+# These are used to validate that data loading is working correctly
+VALIDATION_ENABLED = False  # Set to True to enable validation
+EXPECTED_REVENUE = {}  # Example: {2021: 99_880_000, 2024: 89_990_000}
+REVENUE_TOLERANCE_PCT = 0.01  # 1% tolerance for validation
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+def ensure_directories() -> None:
+    """
+    Create output directories if they don't exist
+    
+    Creates charts/ and reports/ directories for saving analysis outputs.
+    Called automatically by get_chart_path() and get_report_path().
+    
+    Returns:
+        None: Creates directories in place
+    """
+    OUTPUT_DIR.mkdir(exist_ok=True)
+    REPORTS_DIR.mkdir(exist_ok=True)
+    if DATA_DIR.exists():
+        DATA_DIR.mkdir(exist_ok=True)
+
+def get_chart_path(filename: str) -> Path:
+    """
+    Get full path for chart file
+    
+    Args:
+        filename: Chart filename (e.g., 'revenue_trend.png')
+    
+    Returns:
+        Path: Full path to chart file in OUTPUT_DIR
+    """
+    ensure_directories()
+    return OUTPUT_DIR / filename
+
+def get_report_path(filename: str) -> Path:
+    """
+    Get full path for report file
+    
+    Args:
+        filename: Report filename (e.g., 'analysis_report.pdf')
+    
+    Returns:
+        Path: Full path to report file in REPORTS_DIR
+    """
+    ensure_directories()
+    return REPORTS_DIR / filename
+
+def get_data_path(filename: Optional[str] = None) -> Path:
+    """
+    Get full path for data file
+    
+    This function handles data file location logic:
+    - If DATA_DIR exists, looks there first
+    - Otherwise uses current directory
+    - Defaults to DATA_FILE from config if filename not provided
+    
+    Args:
+        filename: Optional filename override (defaults to config.DATA_FILE)
+    
+    Returns:
+        Path: Full path to data file
+    
+    Example:
+        >>> from config import get_data_path
+        >>> data_path = get_data_path()
+        >>> print(f"Loading from: {data_path}")
+    """
+    if filename is None:
+        filename = DATA_FILE
+    if DATA_DIR.exists():
+        return DATA_DIR / filename
+    return Path(filename)
+
+def get_ltm_period() -> Tuple[Optional[pd.Period], Optional[pd.Period]]:
+    """
+    Get LTM (Last Twelve Months) period boundaries from config
+    
+    Returns LTM start and end periods if LTM is enabled and configured,
+    otherwise returns (None, None).
+    
+    Returns:
+        Tuple[Optional[pd.Period], Optional[pd.Period]]: 
+            (ltm_start, ltm_end) or (None, None) if disabled
+    
+    Example:
+        >>> ltm_start, ltm_end = get_ltm_period()
+        >>> if ltm_start and ltm_end:
+        ...     print(f"LTM: {ltm_start} to {ltm_end}")
+    
+    See Also:
+        - get_ltm_label() - Get formatted LTM label string
+        - .cursor/rules/ltm_methodology.md - LTM explanation
+    """
+    if LTM_ENABLED and LTM_START and LTM_END:
+        return LTM_START, LTM_END
+    return None, None
+
+def get_ltm_label() -> Optional[str]:
+    """
+    Get LTM label string for display
+    
+    Returns formatted label like "2025 (LTM 9/2025)" if LTM is enabled,
+    otherwise None. Use this in chart titles and labels.
+    
+    Returns:
+        Optional[str]: LTM label string or None if LTM disabled
+    
+    Example:
+        >>> from config import get_ltm_label
+        >>> ltm_label = get_ltm_label()
+        >>> if ltm_label:
+        ...     title = f'Revenue Trend\n({ltm_label})'
+    
+    See Also:
+        - get_ltm_period() - Get LTM period objects
+        - .cursor/rules/ltm_methodology.md - LTM usage guide
+    """
+    return LTM_LABEL if LTM_ENABLED else None
diff --git a/config_validator.py b/config_validator.py
new file mode 100644
index 0000000..87597fd
--- /dev/null
+++ b/config_validator.py
@@ -0,0 +1,214 @@
+"""
+Configuration validation utility
+Validates configuration settings against data to catch errors early
+
+Usage:
+    from config_validator import validate_config
+    
+    # Validate configuration
+    errors, warnings = validate_config(df)
+    if errors:
+        print("Configuration errors found:", errors)
+"""
+import pandas as pd
+from pathlib import Path
+from config import (
+    DATA_FILE, REVENUE_COLUMN, DATE_COLUMN, DATE_FALLBACK_COLUMNS,
+    CUSTOMER_COLUMN, ITEM_COLUMN, QUANTITY_COLUMN,
+    MIN_YEAR, MAX_DATE, ANALYSIS_YEARS,
+    LTM_ENABLED, LTM_START, LTM_END, LTM_START_YEAR, LTM_END_YEAR,
+    EXCLUSION_FILTERS, get_data_path
+)
+
+def validate_config(df=None):
+    """
+    Validate configuration against data
+    
+    Args:
+        df: Optional DataFrame to validate against. If None, attempts to load data.
+    
+    Returns:
+        tuple: (errors list, warnings list)
+    
+    Example:
+        errors, warnings = validate_config(df)
+        if errors:
+            for error in errors:
+                print(f"ERROR: {error}")
+        if warnings:
+            for warning in warnings:
+                print(f"WARNING: {warning}")
+    """
+    errors = []
+    warnings = []
+    
+    # Load data if not provided
+    if df is None:
+        try:
+            from data_loader import load_sales_data
+            data_path = get_data_path()
+            if not data_path.exists():
+                errors.append(f"Data file not found: {data_path}")
+                return errors, warnings
+            df = load_sales_data(data_path)
+        except Exception as e:
+            errors.append(f"Could not load data for validation: {e}")
+            return errors, warnings
+    
+    # 1. Validate required columns exist
+    required_columns = [REVENUE_COLUMN, DATE_COLUMN]
+    for col in required_columns:
+        if col not in df.columns:
+            errors.append(f"Required column '{col}' not found in data. Available columns: {list(df.columns)[:10]}")
+    
+    # 2. Validate date column has valid dates
+    if DATE_COLUMN in df.columns:
+        date_coverage = df[DATE_COLUMN].notna().sum() / len(df) * 100
+        if date_coverage < 50:
+            errors.append(f"Date coverage is very low ({date_coverage:.1f}%). Check date column configuration.")
+        elif date_coverage < 90:
+            warnings.append(f"Date coverage is {date_coverage:.1f}%. Consider adding fallback date columns.")
+    
+    # 3. Validate fallback date columns
+    if DATE_FALLBACK_COLUMNS:
+        missing_fallbacks = [col for col in DATE_FALLBACK_COLUMNS if col not in df.columns]
+        if missing_fallbacks:
+            warnings.append(f"Fallback date columns not found: {missing_fallbacks}")
+    
+    # 4. Validate revenue column is numeric
+    if REVENUE_COLUMN in df.columns:
+        try:
+            pd.to_numeric(df[REVENUE_COLUMN], errors='coerce')
+            valid_revenue = df[REVENUE_COLUMN].notna().sum()
+            if valid_revenue == 0:
+                errors.append(f"Revenue column '{REVENUE_COLUMN}' has no valid numeric values")
+            elif valid_revenue < len(df) * 0.9:
+                warnings.append(f"Revenue column has {len(df) - valid_revenue} invalid values")
+        except Exception:
+            errors.append(f"Revenue column '{REVENUE_COLUMN}' cannot be converted to numeric")
+    
+    # 5. Validate date range
+    if DATE_COLUMN in df.columns and df[DATE_COLUMN].notna().any():
+        min_date_in_data = df[DATE_COLUMN].min()
+        max_date_in_data = df[DATE_COLUMN].max()
+        
+        if MIN_YEAR and min_date_in_data.year > MIN_YEAR:
+            warnings.append(f"MIN_YEAR ({MIN_YEAR}) is earlier than earliest data ({min_date_in_data.year})")
+        
+        if MAX_DATE and max_date_in_data > MAX_DATE:
+            warnings.append(f"MAX_DATE ({MAX_DATE.date()}) is earlier than latest data ({max_date_in_data.date()})")
+    
+    # 6. Validate analysis years
+    if 'Year' in df.columns:
+        available_years = sorted(df['Year'].unique())
+        missing_years = [year for year in ANALYSIS_YEARS if year not in available_years]
+        if missing_years:
+            warnings.append(f"ANALYSIS_YEARS includes years not in data: {missing_years}")
+    
+    # 7. Validate LTM configuration
+    if LTM_ENABLED:
+        if LTM_START is None or LTM_END is None:
+            errors.append("LTM_ENABLED is True but LTM_START or LTM_END is None")
+        else:
+            if LTM_START > LTM_END:
+                errors.append(f"LTM_START ({LTM_START}) is after LTM_END ({LTM_END})")
+            
+            if 'YearMonth' in df.columns:
+                available_periods = df['YearMonth'].unique()
+                if LTM_START not in available_periods:
+                    warnings.append(f"LTM_START ({LTM_START}) not found in data")
+                if LTM_END not in available_periods:
+                    warnings.append(f"LTM_END ({LTM_END}) not found in data")
+    
+    # 8. Validate exclusion filters
+    if EXCLUSION_FILTERS.get('enabled', False):
+        exclude_col = EXCLUSION_FILTERS.get('exclude_by_column')
+        if exclude_col:
+            if exclude_col not in df.columns:
+                errors.append(f"Exclusion filter column '{exclude_col}' not found in data")
+            else:
+                exclude_values = EXCLUSION_FILTERS.get('exclude_values', [])
+                if exclude_values:
+                    available_values = df[exclude_col].unique()
+                    invalid_values = [v for v in exclude_values if v not in available_values]
+                    if invalid_values:
+                        warnings.append(f"Exclusion filter values not found in data: {invalid_values}")
+    
+    # 9. Validate optional columns (warnings only)
+    optional_columns = {
+        'Customer': CUSTOMER_COLUMN,
+        'Item': ITEM_COLUMN,
+        'Quantity': QUANTITY_COLUMN
+    }
+    
+    for col_type, col_name in optional_columns.items():
+        if col_name and col_name not in df.columns:
+            warnings.append(f"Optional {col_type} column '{col_name}' not found. Some analyses may not work.")
+    
+    # 10. Validate data file exists
+    data_path = get_data_path()
+    if not data_path.exists():
+        errors.append(f"Data file not found: {data_path}")
+    
+    return errors, warnings
+
+def print_validation_report(errors, warnings):
+    """
+    Print a formatted validation report
+    
+    Args:
+        errors: List of error messages
+        warnings: List of warning messages
+    """
+    print("\n" + "="*60)
+    print("Configuration Validation Report")
+    print("="*60)
+    
+    if errors:
+        print(f"\n❌ ERRORS ({len(errors)}):")
+        for i, error in enumerate(errors, 1):
+            print(f"  {i}. {error}")
+    else:
+        print("\n✅ No configuration errors found")
+    
+    if warnings:
+        print(f"\n⚠️  WARNINGS ({len(warnings)}):")
+        for i, warning in enumerate(warnings, 1):
+            print(f"  {i}. {warning}")
+    else:
+        print("\n✅ No warnings")
+    
+    print("\n" + "="*60)
+    
+    if errors:
+        return False
+    return True
+
+def validate_and_report(df=None):
+    """
+    Validate configuration and print report
+    
+    Args:
+        df: Optional DataFrame to validate against
+    
+    Returns:
+        bool: True if no errors, False otherwise
+    """
+    errors, warnings = validate_config(df)
+    return print_validation_report(errors, warnings)
+
+# ============================================================================
+# STANDALONE VALIDATION SCRIPT
+# ============================================================================
+
+if __name__ == "__main__":
+    """Run configuration validation"""
+    print("Validating configuration...")
+    is_valid = validate_and_report()
+    
+    if is_valid:
+        print("\n✅ Configuration is valid!")
+        exit(0)
+    else:
+        print("\n❌ Configuration has errors. Please fix them before running analyses.")
+        exit(1)
diff --git a/data_loader.py b/data_loader.py
new file mode 100644
index 0000000..255bad5
--- /dev/null
+++ b/data_loader.py
@@ -0,0 +1,224 @@
+"""
+Generic data loading utility with flexible date handling
+Handles various date column formats and fallback logic
+
+This loader is designed to work with different CSV structures by:
+1. Trying primary date column first
+2. Falling back to alternative date columns if needed
+3. Ensuring 100% date coverage
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from config import (
+    REVENUE_COLUMN, DATE_COLUMN, DATE_FALLBACK_COLUMNS,
+    get_data_path
+)
+
+def load_sales_data(filepath=None):
+    """
+    Load sales data with flexible date handling
+    
+    This function provides intelligent data loading with fallback logic:
+    1. Loads the CSV file
+    2. Converts revenue column to numeric
+    3. Attempts to parse dates using primary date column
+    4. Falls back to alternative date columns if needed (100% coverage)
+    5. Creates Year and YearMonth columns for analysis
+    
+    CRITICAL: Always use this function instead of pd.read_csv() directly.
+    This ensures proper date parsing with fallback logic.
+    
+    Args:
+        filepath: Path to the CSV file (defaults to config.DATA_FILE).
+                  Can be str, Path, or None (uses config.get_data_path())
+    
+    Returns:
+        pd.DataFrame: DataFrame with properly parsed dates and revenue.
+                     Includes 'Year' and 'YearMonth' columns.
+    
+    Raises:
+        FileNotFoundError: If data file doesn't exist.
+                          Error message includes file path and suggests checking config.py
+        ValueError: If required columns (REVENUE_COLUMN) are missing.
+                   Error message lists available columns and suggests updating config.py
+    
+    Example:
+        >>> from data_loader import load_sales_data
+        >>> from config import get_data_path
+        >>> df = load_sales_data(get_data_path())
+        >>> print(f"Loaded {len(df):,} rows with {df['Year'].notna().sum():,} with dates")
+    
+    See Also:
+        - .cursor/rules/data_loading.md for detailed patterns
+        - config.py for column name configuration
+    """
+    # Get data file path
+    if filepath is None:
+        filepath = get_data_path()
+    else:
+        filepath = Path(filepath)
+    
+    # Check if file exists
+    if not filepath.exists():
+        raise FileNotFoundError(
+            f"Data file not found: {filepath}\n"
+            f"Please update config.py with the correct DATA_FILE path."
+        )
+    
+    # Load CSV
+    print(f"Loading data from: {filepath}")
+    df = pd.read_csv(filepath, low_memory=False)
+    print(f"Loaded {len(df):,} rows")
+    
+    # Validate required columns
+    if REVENUE_COLUMN not in df.columns:
+        raise ValueError(
+            f"Required column '{REVENUE_COLUMN}' not found in data.\n"
+            f"Available columns: {list(df.columns)}\n"
+            f"Please update config.py REVENUE_COLUMN to match your data."
+        )
+    
+    # Convert revenue column to numeric
+    df[REVENUE_COLUMN] = pd.to_numeric(df[REVENUE_COLUMN], errors='coerce')
+    
+    # Count missing revenue values
+    missing_revenue = df[REVENUE_COLUMN].isna().sum()
+    if missing_revenue > 0:
+        print(f"Warning: {missing_revenue:,} rows have missing/invalid revenue values")
+    
+    # Create working date column
+    df['WorkingDate'] = pd.NaT
+    
+    # Try primary date column first
+    if DATE_COLUMN in df.columns:
+        print(f"Attempting to parse {DATE_COLUMN}...")
+        df['Date_Parsed'] = pd.to_datetime(df[DATE_COLUMN], errors='coerce', format='mixed')
+        parsed_count = df['Date_Parsed'].notna().sum()
+        df.loc[df['Date_Parsed'].notna(), 'WorkingDate'] = df.loc[df['Date_Parsed'].notna(), 'Date_Parsed']
+        print(f"  Parsed {parsed_count:,} dates from {DATE_COLUMN}")
+    else:
+        print(f"Warning: Primary date column '{DATE_COLUMN}' not found")
+    
+    # Use fallback date columns
+    if DATE_FALLBACK_COLUMNS:
+        for fallback_col in DATE_FALLBACK_COLUMNS:
+            if fallback_col in df.columns:
+                missing_dates = df['WorkingDate'].isna()
+                if missing_dates.sum() > 0:
+                    print(f"Using fallback column: {fallback_col}...")
+                    fallback_parsed = pd.to_datetime(
+                        df.loc[missing_dates, fallback_col], 
+                        errors='coerce',
+                        format='mixed'
+                    )
+                    newly_parsed = missing_dates & fallback_parsed.notna()
+                    if newly_parsed.sum() > 0:
+                        df.loc[newly_parsed, 'WorkingDate'] = fallback_parsed[newly_parsed]
+                        print(f"  Parsed {newly_parsed.sum():,} additional dates from {fallback_col}")
+    
+    # Final fallback: try to construct from Year column if available
+    if 'Year' in df.columns and df['WorkingDate'].isna().sum() > 0:
+        missing_dates = df['WorkingDate'].isna()
+        year_values = pd.to_numeric(df.loc[missing_dates, 'Year'], errors='coerce')
+        valid_years = missing_dates & year_values.notna()
+        if valid_years.sum() > 0:
+            print(f"Using Year column for remaining {valid_years.sum():,} rows...")
+            df.loc[valid_years, 'WorkingDate'] = pd.to_datetime(
+                df.loc[valid_years, 'Year'].astype(int).astype(str) + '-01-01',
+                errors='coerce'
+            )
+    
+    # Set WorkingDate as the primary date column
+    df[DATE_COLUMN] = df['WorkingDate']
+    
+    # Clean up temporary columns
+    df = df.drop(columns=['Date_Parsed', 'WorkingDate'], errors='ignore')
+    
+    # Extract Year from date column
+    df['Year'] = df[DATE_COLUMN].dt.year
+    
+    # Fill missing Year from Year column if it exists and date is missing
+    if 'Year' in df.columns:
+        year_orig = pd.to_numeric(df['Year'], errors='coerce')
+        missing_year = df['Year'].isna()
+        if missing_year.sum() > 0 and 'Year' in df.columns:
+            year_fallback = pd.to_numeric(df.loc[missing_year, 'Year'], errors='coerce')
+            df.loc[missing_year & year_fallback.notna(), 'Year'] = year_fallback[missing_year & year_fallback.notna()]
+    
+    # Create YearMonth for monthly analysis
+    if DATE_COLUMN in df.columns:
+        df['YearMonth'] = df[DATE_COLUMN].dt.to_period('M')
+    
+    # Report date coverage
+    total_rows = len(df)
+    date_coverage = df[DATE_COLUMN].notna().sum()
+    coverage_pct = (date_coverage / total_rows * 100) if total_rows > 0 else 0
+    print(f"Date coverage: {date_coverage:,} / {total_rows:,} rows ({coverage_pct:.1f}%)")
+    
+    if coverage_pct < 100:
+        print(f"Warning: {total_rows - date_coverage:,} rows have missing dates")
+    
+    # Report date range
+    if df[DATE_COLUMN].notna().any():
+        min_date = df[DATE_COLUMN].min()
+        max_date = df[DATE_COLUMN].max()
+        print(f"Date range: {min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}")
+    
+    return df
+
+def validate_data_structure(df: pd.DataFrame) -> tuple[bool, str]:
+    """
+    Validate that loaded data has expected structure.
+    
+    Checks for required columns, data quality, and basic validity.
+    Returns actionable error messages if validation fails.
+    
+    Args:
+        df: DataFrame to validate (should be result of load_sales_data())
+    
+    Returns:
+        tuple[bool, str]: (is_valid, error_message)
+            - is_valid: True if data structure is valid, False otherwise
+            - error_message: "OK" if valid, otherwise descriptive error message
+    
+    Example:
+        >>> df = load_sales_data(get_data_path())
+        >>> is_valid, msg = validate_data_structure(df)
+        >>> if not is_valid:
+        ...     print(f"ERROR: {msg}")
+    
+    See Also:
+        - load_sales_data() - Load data before validating
+        - config_validator.py - Comprehensive configuration validation
+    """
+    from config import REVENUE_COLUMN, DATE_COLUMN
+    
+    errors = []
+    
+    # Check required columns
+    if REVENUE_COLUMN not in df.columns:
+        errors.append(f"Missing required column: {REVENUE_COLUMN}")
+    
+    if DATE_COLUMN not in df.columns:
+        errors.append(f"Missing required column: {DATE_COLUMN}")
+    
+    # Check data quality
+    if len(df) == 0:
+        errors.append("DataFrame is empty")
+    
+    if REVENUE_COLUMN in df.columns:
+        if df[REVENUE_COLUMN].isna().all():
+            errors.append(f"All {REVENUE_COLUMN} values are NaN")
+        
+        if df[REVENUE_COLUMN].notna().sum() == 0:
+            errors.append(f"No valid {REVENUE_COLUMN} values")
+    
+    if DATE_COLUMN in df.columns:
+        if df[DATE_COLUMN].isna().all():
+            errors.append(f"All {DATE_COLUMN} values are NaN")
+    
+    if errors:
+        return False, "; ".join(errors)
+    
+    return True, "OK"
diff --git a/data_processing.py b/data_processing.py
new file mode 100644
index 0000000..f1bbe9b
--- /dev/null
+++ b/data_processing.py
@@ -0,0 +1,285 @@
+"""
+Data processing utilities
+Common data cleaning and transformation helpers
+
+Usage:
+    from data_processing import clean_data, create_pivot_table, prepare_time_series
+    
+    # Clean data
+    df_clean = clean_data(df)
+    
+    # Create pivot table
+    pivot = create_pivot_table(df, index='Year', columns='Product', values='Revenue')
+"""
+import pandas as pd
+import numpy as np
+from config import REVENUE_COLUMN, DATE_COLUMN, MIN_QUANTITY, MAX_QUANTITY
+
+def clean_data(df, remove_duplicates=True, handle_missing_dates=True):
+    """
+    Clean data with common operations
+    
+    Args:
+        df: DataFrame to clean
+        remove_duplicates: Whether to remove duplicate rows
+        handle_missing_dates: Whether to handle missing dates
+    
+    Returns:
+        DataFrame: Cleaned DataFrame
+    """
+    df_clean = df.copy()
+    
+    # Remove duplicates
+    if remove_duplicates:
+        initial_count = len(df_clean)
+        df_clean = df_clean.drop_duplicates()
+        removed = initial_count - len(df_clean)
+        if removed > 0:
+            print(f"Removed {removed:,} duplicate rows")
+    
+    # Handle missing dates
+    if handle_missing_dates and DATE_COLUMN in df_clean.columns:
+        missing_dates = df_clean[DATE_COLUMN].isna().sum()
+        if missing_dates > 0:
+            print(f"Warning: {missing_dates:,} rows have missing dates")
+    
+    # Remove rows with negative revenue (if configured)
+    if REVENUE_COLUMN in df_clean.columns:
+        negative_revenue = (df_clean[REVENUE_COLUMN] < 0).sum()
+        if negative_revenue > 0:
+            print(f"Found {negative_revenue:,} rows with negative revenue")
+            # Optionally remove: df_clean = df_clean[df_clean[REVENUE_COLUMN] >= 0]
+    
+    return df_clean
+
+def create_pivot_table(df, index, columns=None, values=None, aggfunc='sum', fill_value=0):
+    """
+    Create pivot table with common defaults
+    
+    Args:
+        df: DataFrame
+        index: Column(s) to use as index
+        columns: Column(s) to use as columns
+        values: Column(s) to aggregate
+        aggfunc: Aggregation function (default: 'sum')
+        fill_value: Value to fill missing cells (default: 0)
+    
+    Returns:
+        DataFrame: Pivot table
+    """
+    if values is None and REVENUE_COLUMN in df.columns:
+        values = REVENUE_COLUMN
+    
+    pivot = pd.pivot_table(
+        df,
+        index=index,
+        columns=columns,
+        values=values,
+        aggfunc=aggfunc,
+        fill_value=fill_value
+    )
+    
+    return pivot
+
+def prepare_time_series(df, date_column=None, value_column=None, freq='M'):
+    """
+    Prepare time series data
+    
+    Args:
+        df: DataFrame
+        date_column: Date column name (defaults to config.DATE_COLUMN)
+        value_column: Value column to aggregate (defaults to config.REVENUE_COLUMN)
+        freq: Frequency for resampling ('D', 'W', 'M', 'Q', 'Y')
+    
+    Returns:
+        Series: Time series data
+    """
+    if date_column is None:
+        date_column = DATE_COLUMN
+    
+    if value_column is None:
+        value_column = REVENUE_COLUMN
+    
+    if date_column not in df.columns:
+        raise ValueError(f"Date column '{date_column}' not found")
+    
+    if value_column not in df.columns:
+        raise ValueError(f"Value column '{value_column}' not found")
+    
+    # Ensure date column is datetime
+    df = df.copy()
+    df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
+    
+    # Set date as index
+    df_indexed = df.set_index(date_column)
+    
+    # Resample and aggregate
+    time_series = df_indexed[value_column].resample(freq).sum()
+    
+    return time_series
+
+def aggregate_by_period(df, period='year', date_column=None, value_column=None):
+    """
+    Aggregate data by time period
+    
+    Args:
+        df: DataFrame
+        period: Period type ('year', 'month', 'quarter')
+        date_column: Date column name
+        value_column: Value column to aggregate
+    
+    Returns:
+        DataFrame: Aggregated data
+    """
+    if date_column is None:
+        date_column = DATE_COLUMN
+    
+    if value_column is None:
+        value_column = REVENUE_COLUMN
+    
+    df = df.copy()
+    df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
+    
+    # Extract period
+    if period == 'year':
+        df['Period'] = df[date_column].dt.year
+    elif period == 'month':
+        df['Period'] = df[date_column].dt.to_period('M')
+    elif period == 'quarter':
+        df['Period'] = df[date_column].dt.to_period('Q')
+    else:
+        raise ValueError(f"Unknown period: {period}")
+    
+    # Aggregate
+    aggregated = df.groupby('Period')[value_column].agg(['sum', 'count', 'mean']).reset_index()
+    aggregated.columns = ['Period', 'Total', 'Count', 'Average']
+    
+    return aggregated
+
+def filter_outliers(df, column, method='iqr', lower_bound=None, upper_bound=None):
+    """
+    Filter outliers from DataFrame
+    
+    Args:
+        df: DataFrame
+        column: Column name to filter on
+        method: Method ('iqr' for interquartile range, 'zscore' for z-score)
+        lower_bound: Manual lower bound
+        upper_bound: Manual upper bound
+    
+    Returns:
+        DataFrame: Filtered DataFrame
+    """
+    df_filtered = df.copy()
+    
+    if method == 'iqr':
+        q1 = df[column].quantile(0.25)
+        q3 = df[column].quantile(0.75)
+        iqr = q3 - q1
+        lower = lower_bound if lower_bound is not None else q1 - 1.5 * iqr
+        upper = upper_bound if upper_bound is not None else q3 + 1.5 * iqr
+    elif method == 'zscore':
+        mean = df[column].mean()
+        std = df[column].std()
+        lower = lower_bound if lower_bound is not None else mean - 3 * std
+        upper = upper_bound if upper_bound is not None else mean + 3 * std
+    else:
+        raise ValueError(f"Unknown method: {method}")
+    
+    initial_count = len(df_filtered)
+    df_filtered = df_filtered[(df_filtered[column] >= lower) & (df_filtered[column] <= upper)]
+    removed = initial_count - len(df_filtered)
+    
+    if removed > 0:
+        print(f"Removed {removed:,} outliers from {column} ({removed/initial_count*100:.1f}%)")
+    
+    return df_filtered
+
+def normalize_column(df, column, method='min_max'):
+    """
+    Normalize a column
+    
+    Args:
+        df: DataFrame
+        column: Column name to normalize
+        method: Normalization method ('min_max', 'zscore')
+    
+    Returns:
+        Series: Normalized values
+    """
+    if method == 'min_max':
+        min_val = df[column].min()
+        max_val = df[column].max()
+        if max_val - min_val == 0:
+            return pd.Series([0] * len(df), index=df.index)
+        return (df[column] - min_val) / (max_val - min_val)
+    elif method == 'zscore':
+        mean = df[column].mean()
+        std = df[column].std()
+        if std == 0:
+            return pd.Series([0] * len(df), index=df.index)
+        return (df[column] - mean) / std
+    else:
+        raise ValueError(f"Unknown method: {method}")
+
+def create_derived_columns(df):
+    """
+    Create common derived columns
+    
+    Args:
+        df: DataFrame
+    
+    Returns:
+        DataFrame: DataFrame with derived columns
+    """
+    df_derived = df.copy()
+    
+    # Extract year, month, quarter if date column exists
+    if DATE_COLUMN in df_derived.columns:
+        df_derived[DATE_COLUMN] = pd.to_datetime(df_derived[DATE_COLUMN], errors='coerce')
+        
+        if 'Year' not in df_derived.columns:
+            df_derived['Year'] = df_derived[DATE_COLUMN].dt.year
+        
+        if 'Month' not in df_derived.columns:
+            df_derived['Month'] = df_derived[DATE_COLUMN].dt.month
+        
+        if 'Quarter' not in df_derived.columns:
+            df_derived['Quarter'] = df_derived[DATE_COLUMN].dt.quarter
+        
+        if 'YearMonth' not in df_derived.columns:
+            df_derived['YearMonth'] = df_derived[DATE_COLUMN].dt.to_period('M')
+    
+    # Calculate price per unit if quantity and revenue exist
+    from config import QUANTITY_COLUMN
+    if QUANTITY_COLUMN in df_derived.columns and REVENUE_COLUMN in df_derived.columns:
+        df_derived['Price_Per_Unit'] = df_derived[REVENUE_COLUMN] / df_derived[QUANTITY_COLUMN].replace(0, np.nan)
+    
+    return df_derived
+
+# ============================================================================
+# EXAMPLE USAGE
+# ============================================================================
+
+if __name__ == "__main__":
+    """Example usage"""
+    # Create sample data
+    df = pd.DataFrame({
+        'InvoiceDate': pd.date_range('2023-01-01', periods=100, freq='D'),
+        'USD': np.random.normal(1000, 200, 100),
+        'Quantity': np.random.randint(1, 100, 100)
+    })
+    
+    # Clean data
+    df_clean = clean_data(df)
+    print(f"Cleaned data: {len(df_clean)} rows")
+    
+    # Create pivot table
+    df_clean['Year'] = df_clean['InvoiceDate'].dt.year
+    pivot = create_pivot_table(df_clean, index='Year', values='USD')
+    print("\nPivot table:")
+    print(pivot)
+    
+    # Prepare time series
+    ts = prepare_time_series(df_clean, freq='M')
+    print(f"\nTime series: {len(ts)} periods")
diff --git a/data_quality.py b/data_quality.py
new file mode 100644
index 0000000..e41d01f
--- /dev/null
+++ b/data_quality.py
@@ -0,0 +1,344 @@
+"""
+Data quality reporting utility
+Generates comprehensive data quality reports
+
+Usage:
+    from data_quality import generate_data_quality_report, print_data_quality_report
+    
+    # Generate and print report
+    report = generate_data_quality_report(df)
+    print_data_quality_report(report)
+"""
+import pandas as pd
+import numpy as np
+from config import (
+    REVENUE_COLUMN, DATE_COLUMN, CUSTOMER_COLUMN, ITEM_COLUMN,
+    QUANTITY_COLUMN, MIN_QUANTITY, MAX_QUANTITY
+)
+
+def generate_data_quality_report(df):
+    """
+    Generate comprehensive data quality report
+    
+    Args:
+        df: DataFrame to analyze
+    
+    Returns:
+        dict: Dictionary containing data quality metrics
+    """
+    report = {
+        'overview': {},
+        'missing_values': {},
+        'duplicates': {},
+        'outliers': {},
+        'data_types': {},
+        'date_coverage': {},
+        'revenue_summary': {},
+        'issues': []
+    }
+    
+    # Overview
+    report['overview'] = {
+        'total_rows': len(df),
+        'total_columns': len(df.columns),
+        'memory_usage_mb': df.memory_usage(deep=True).sum() / 1024**2
+    }
+    
+    # Missing values
+    missing = df.isnull().sum()
+    missing_pct = (missing / len(df)) * 100
+    report['missing_values'] = {
+        'by_column': missing[missing > 0].to_dict(),
+        'percentages': missing_pct[missing > 0].to_dict(),
+        'total_missing': missing.sum(),
+        'columns_with_missing': len(missing[missing > 0])
+    }
+    
+    # Duplicates
+    duplicate_rows = df.duplicated().sum()
+    report['duplicates'] = {
+        'duplicate_rows': int(duplicate_rows),
+        'duplicate_percentage': (duplicate_rows / len(df)) * 100 if len(df) > 0 else 0
+    }
+    
+    # Outliers (revenue and quantity)
+    outliers = {}
+    
+    if REVENUE_COLUMN in df.columns:
+        revenue = pd.to_numeric(df[REVENUE_COLUMN], errors='coerce')
+        q1 = revenue.quantile(0.25)
+        q3 = revenue.quantile(0.75)
+        iqr = q3 - q1
+        lower_bound = q1 - 1.5 * iqr
+        upper_bound = q3 + 1.5 * iqr
+        
+        revenue_outliers = ((revenue < lower_bound) | (revenue > upper_bound)).sum()
+        outliers['revenue'] = {
+            'count': int(revenue_outliers),
+            'percentage': (revenue_outliers / len(df)) * 100 if len(df) > 0 else 0,
+            'lower_bound': float(lower_bound),
+            'upper_bound': float(upper_bound),
+            'negative_values': int((revenue < 0).sum())
+        }
+    
+    if QUANTITY_COLUMN in df.columns:
+        quantity = pd.to_numeric(df[QUANTITY_COLUMN], errors='coerce')
+        # Use config thresholds if available
+        if MIN_QUANTITY is not None and MAX_QUANTITY is not None:
+            quantity_outliers = ((quantity < MIN_QUANTITY) | (quantity > MAX_QUANTITY)).sum()
+            outliers['quantity'] = {
+                'count': int(quantity_outliers),
+                'percentage': (quantity_outliers / len(df)) * 100 if len(df) > 0 else 0,
+                'below_min': int((quantity < MIN_QUANTITY).sum()),
+                'above_max': int((quantity > MAX_QUANTITY).sum())
+            }
+        else:
+            q1 = quantity.quantile(0.25)
+            q3 = quantity.quantile(0.75)
+            iqr = q3 - q1
+            lower_bound = q1 - 1.5 * iqr
+            upper_bound = q3 + 1.5 * iqr
+            
+            quantity_outliers = ((quantity < lower_bound) | (quantity > upper_bound)).sum()
+            outliers['quantity'] = {
+                'count': int(quantity_outliers),
+                'percentage': (quantity_outliers / len(df)) * 100 if len(df) > 0 else 0,
+                'lower_bound': float(lower_bound),
+                'upper_bound': float(upper_bound)
+            }
+    
+    report['outliers'] = outliers
+    
+    # Data types
+    report['data_types'] = {
+        'numeric_columns': list(df.select_dtypes(include=[np.number]).columns),
+        'datetime_columns': list(df.select_dtypes(include=['datetime64']).columns),
+        'object_columns': list(df.select_dtypes(include=['object']).columns),
+        'type_summary': df.dtypes.value_counts().to_dict()
+    }
+    
+    # Date coverage
+    if DATE_COLUMN in df.columns:
+        date_coverage = df[DATE_COLUMN].notna().sum()
+        report['date_coverage'] = {
+            'total_rows': len(df),
+            'rows_with_dates': int(date_coverage),
+            'coverage_percentage': (date_coverage / len(df)) * 100 if len(df) > 0 else 0,
+            'min_date': str(df[DATE_COLUMN].min()) if date_coverage > 0 else None,
+            'max_date': str(df[DATE_COLUMN].max()) if date_coverage > 0 else None
+        }
+    
+    # Revenue summary
+    if REVENUE_COLUMN in df.columns:
+        revenue = pd.to_numeric(df[REVENUE_COLUMN], errors='coerce')
+        valid_revenue = revenue.dropna()
+        
+        if len(valid_revenue) > 0:
+            report['revenue_summary'] = {
+                'total_revenue': float(valid_revenue.sum()),
+                'mean_revenue': float(valid_revenue.mean()),
+                'median_revenue': float(valid_revenue.median()),
+                'min_revenue': float(valid_revenue.min()),
+                'max_revenue': float(valid_revenue.max()),
+                'std_revenue': float(valid_revenue.std()),
+                'valid_rows': int(len(valid_revenue)),
+                'invalid_rows': int(len(df) - len(valid_revenue))
+            }
+    
+    # Identify issues
+    issues = []
+    
+    # Critical issues
+    if report['missing_values']['columns_with_missing'] > 0:
+        high_missing = {k: v for k, v in report['missing_values']['percentages'].items() if v > 50}
+        if high_missing:
+            issues.append({
+                'severity': 'critical',
+                'issue': f"Columns with >50% missing values: {list(high_missing.keys())}",
+                'impact': 'High'
+            })
+    
+    if DATE_COLUMN in df.columns:
+        if report['date_coverage']['coverage_percentage'] < 50:
+            issues.append({
+                'severity': 'critical',
+                'issue': f"Date coverage is only {report['date_coverage']['coverage_percentage']:.1f}%",
+                'impact': 'High - analyses may fail'
+            })
+    
+    if REVENUE_COLUMN in df.columns:
+        if report['revenue_summary'].get('invalid_rows', 0) > len(df) * 0.1:
+            issues.append({
+                'severity': 'critical',
+                'issue': f"{report['revenue_summary']['invalid_rows']} rows have invalid revenue values",
+                'impact': 'High'
+            })
+    
+    # Warnings
+    if report['duplicates']['duplicate_percentage'] > 5:
+        issues.append({
+            'severity': 'warning',
+            'issue': f"{report['duplicates']['duplicate_rows']} duplicate rows ({report['duplicates']['duplicate_percentage']:.1f}%)",
+            'impact': 'Medium'
+        })
+    
+    if 'revenue' in outliers:
+        if outliers['revenue']['percentage'] > 10:
+            issues.append({
+                'severity': 'warning',
+                'issue': f"{outliers['revenue']['count']} revenue outliers ({outliers['revenue']['percentage']:.1f}%)",
+                'impact': 'Medium'
+            })
+    
+    report['issues'] = issues
+    
+    return report
+
+def print_data_quality_report(report):
+    """
+    Print formatted data quality report
+    
+    Args:
+        report: Dictionary from generate_data_quality_report()
+    """
+    print("\n" + "="*70)
+    print("DATA QUALITY REPORT")
+    print("="*70)
+    
+    # Overview
+    print("\n📊 OVERVIEW")
+    print("-" * 70)
+    print(f"Total Rows: {report['overview']['total_rows']:,}")
+    print(f"Total Columns: {report['overview']['total_columns']}")
+    print(f"Memory Usage: {report['overview']['memory_usage_mb']:.2f} MB")
+    
+    # Missing values
+    print("\n🔍 MISSING VALUES")
+    print("-" * 70)
+    if report['missing_values']['columns_with_missing'] > 0:
+        print(f"Columns with missing values: {report['missing_values']['columns_with_missing']}")
+        print(f"Total missing values: {report['missing_values']['total_missing']:,}")
+        print("\nTop columns by missing values:")
+        missing_sorted = sorted(
+            report['missing_values']['percentages'].items(),
+            key=lambda x: x[1],
+            reverse=True
+        )[:10]
+        for col, pct in missing_sorted:
+            count = report['missing_values']['by_column'][col]
+            print(f"  {col:30s}: {count:8,} ({pct:5.1f}%)")
+    else:
+        print("✅ No missing values found")
+    
+    # Duplicates
+    print("\n🔄 DUPLICATES")
+    print("-" * 70)
+    if report['duplicates']['duplicate_rows'] > 0:
+        print(f"Duplicate Rows: {report['duplicates']['duplicate_rows']:,} ({report['duplicates']['duplicate_percentage']:.2f}%)")
+    else:
+        print("✅ No duplicate rows found")
+    
+    # Outliers
+    print("\n📈 OUTLIERS")
+    print("-" * 70)
+    if 'revenue' in report['outliers']:
+        rev_out = report['outliers']['revenue']
+        print(f"Revenue Outliers: {rev_out['count']:,} ({rev_out['percentage']:.2f}%)")
+        if 'negative_values' in rev_out and rev_out['negative_values'] > 0:
+            print(f"  Negative Revenue Values: {rev_out['negative_values']:,}")
+    
+    if 'quantity' in report['outliers']:
+        qty_out = report['outliers']['quantity']
+        print(f"Quantity Outliers: {qty_out['count']:,} ({qty_out['percentage']:.2f}%)")
+    
+    if not report['outliers']:
+        print("✅ No significant outliers detected")
+    
+    # Date coverage
+    if report['date_coverage']:
+        print("\n📅 DATE COVERAGE")
+        print("-" * 70)
+        dc = report['date_coverage']
+        print(f"Rows with Dates: {dc['rows_with_dates']:,} / {dc['total_rows']:,} ({dc['coverage_percentage']:.1f}%)")
+        if dc['min_date']:
+            print(f"Date Range: {dc['min_date']} to {dc['max_date']}")
+    
+    # Revenue summary
+    if report['revenue_summary']:
+        print("\n💰 REVENUE SUMMARY")
+        print("-" * 70)
+        rs = report['revenue_summary']
+        print(f"Total Revenue: ${rs['total_revenue'] / 1e6:.2f}m")
+        print(f"Valid Rows: {rs['valid_rows']:,} / {rs['valid_rows'] + rs['invalid_rows']:,}")
+        if rs['invalid_rows'] > 0:
+            print(f"Invalid Rows: {rs['invalid_rows']:,}")
+        print(f"Mean: ${rs['mean_revenue']:,.2f}")
+        print(f"Median: ${rs['median_revenue']:,.2f}")
+        print(f"Min: ${rs['min_revenue']:,.2f}")
+        print(f"Max: ${rs['max_revenue']:,.2f}")
+    
+    # Issues
+    if report['issues']:
+        print("\n⚠️  ISSUES DETECTED")
+        print("-" * 70)
+        critical = [i for i in report['issues'] if i['severity'] == 'critical']
+        warnings = [i for i in report['issues'] if i['severity'] == 'warning']
+        
+        if critical:
+            print("❌ CRITICAL ISSUES:")
+            for issue in critical:
+                print(f"  • {issue['issue']}")
+                print(f"    Impact: {issue['impact']}")
+        
+        if warnings:
+            print("\n⚠️  WARNINGS:")
+            for issue in warnings:
+                print(f"  • {issue['issue']}")
+                print(f"    Impact: {issue['impact']}")
+    else:
+        print("\n✅ NO ISSUES DETECTED")
+    
+    print("\n" + "="*70)
+
+def generate_data_quality_report_simple(df):
+    """
+    Generate a simple data quality summary (quick check)
+    
+    Args:
+        df: DataFrame to analyze
+    
+    Returns:
+        str: Simple summary string
+    """
+    summary_parts = []
+    
+    summary_parts.append(f"Rows: {len(df):,}")
+    summary_parts.append(f"Columns: {len(df.columns)}")
+    
+    if REVENUE_COLUMN in df.columns:
+        revenue = pd.to_numeric(df[REVENUE_COLUMN], errors='coerce')
+        valid = revenue.notna().sum()
+        summary_parts.append(f"Valid Revenue: {valid:,} ({valid/len(df)*100:.1f}%)")
+    
+    if DATE_COLUMN in df.columns:
+        date_coverage = df[DATE_COLUMN].notna().sum()
+        summary_parts.append(f"Date Coverage: {date_coverage:,} ({date_coverage/len(df)*100:.1f}%)")
+    
+    return " | ".join(summary_parts)
+
+# ============================================================================
+# STANDALONE DATA QUALITY CHECK
+# ============================================================================
+
+if __name__ == "__main__":
+    """Run data quality check"""
+    from data_loader import load_sales_data
+    from config import get_data_path
+    
+    print("Loading data for quality check...")
+    try:
+        df = load_sales_data(get_data_path())
+        report = generate_data_quality_report(df)
+        print_data_quality_report(report)
+    except Exception as e:
+        print(f"ERROR: {e}")
diff --git a/examples/annual_revenue_trend.py b/examples/annual_revenue_trend.py
new file mode 100644
index 0000000..2b355b9
--- /dev/null
+++ b/examples/annual_revenue_trend.py
@@ -0,0 +1,134 @@
+"""
+Example: Annual Revenue Trend Analysis
+Simple example showing annual revenue with LTM support
+
+This is a working example that demonstrates:
+- Loading data using data_loader
+- Calculating annual metrics with LTM
+- Creating a revenue trend chart
+- Following template best practices
+"""
+import pandas as pd
+import matplotlib.pyplot as plt
+from pathlib import Path
+
+# Import utilities
+from data_loader import load_sales_data, validate_data_structure
+from validate_revenue import validate_revenue
+from analysis_utils import (
+    get_ltm_period_config, calculate_annual_metrics,
+    setup_revenue_chart, save_chart,
+    format_currency, print_annual_summary, sort_mixed_years,
+    apply_exclusion_filters
+)
+from config import (
+    OUTPUT_DIR, ANALYSIS_YEARS, MAX_DATE,
+    CHART_SIZES, ensure_directories, get_data_path, COMPANY_NAME,
+    REVENUE_COLUMN, MIN_YEAR, DATE_COLUMN
+)
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+ANALYSIS_NAME = "Annual Revenue Trend"
+DESCRIPTION = "Simple annual revenue trend analysis with LTM support"
+
+# ============================================================================
+# MAIN ANALYSIS FUNCTION
+# ============================================================================
+
+def main():
+    """Main analysis function"""
+    
+    print(f"\n{'='*60}")
+    print(f"{ANALYSIS_NAME}")
+    print(f"{'='*60}\n")
+    
+    # 1. Load data
+    print("Loading data...")
+    try:
+        df = load_sales_data(get_data_path())
+        print(f"Loaded {len(df):,} transactions")
+    except Exception as e:
+        print(f"ERROR loading data: {e}")
+        return
+    
+    # 2. Validate data structure
+    is_valid, msg = validate_data_structure(df)
+    if not is_valid:
+        print(f"ERROR: {msg}")
+        return
+    print("Data validation passed")
+    
+    # 3. Apply exclusion filters (if configured)
+    df = apply_exclusion_filters(df)
+    
+    # 4. Filter by date range
+    df = df[df['Year'] >= MIN_YEAR]
+    if DATE_COLUMN in df.columns:
+        df = df[df[DATE_COLUMN] <= MAX_DATE]
+    
+    # 5. Setup LTM period (if enabled)
+    ltm_start, ltm_end = get_ltm_period_config()
+    if ltm_start and ltm_end:
+        print(f"LTM period: {ltm_start} to {ltm_end}")
+    
+    # 6. Calculate annual metrics
+    print("\nCalculating annual metrics...")
+    
+    def calculate_metrics(year_data):
+        """Calculate metrics for a single year"""
+        return {
+            'Revenue': year_data[REVENUE_COLUMN].sum(),
+        }
+    
+    annual_df = calculate_annual_metrics(df, calculate_metrics, ltm_start, ltm_end)
+    
+    # 7. Print summary
+    print_annual_summary(annual_df, 'Revenue', 'Revenue')
+    
+    # 8. Create visualization
+    print("Generating chart...")
+    ensure_directories()
+    
+    # Annual revenue trend chart
+    fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+    
+    # Prepare data for plotting (handle mixed types)
+    annual_df_sorted = sort_mixed_years(annual_df.reset_index(), 'Year')
+    years = annual_df_sorted['Year'].tolist()
+    revenue = annual_df_sorted['Revenue'].values / 1e6  # Convert to millions
+    
+    # Create chart
+    ax.plot(range(len(years)), revenue, marker='o', linewidth=2, markersize=8, color='#2E86AB')
+    ax.set_xticks(range(len(years)))
+    ax.set_xticklabels(years, rotation=45, ha='right')
+    setup_revenue_chart(ax)
+    
+    # Add LTM notation to title if applicable
+    title = f'Annual Revenue Trend - {COMPANY_NAME}'
+    if ltm_start and ltm_end:
+        from config import get_ltm_label
+        ltm_label = get_ltm_label()
+        if ltm_label:
+            title += f'\n({ltm_label})'
+    ax.set_title(title, fontsize=14, fontweight='bold')
+    
+    plt.tight_layout()
+    save_chart(fig, 'annual_revenue_trend.png')
+    plt.close()
+    
+    # 9. Validate revenue
+    print("\nValidating revenue...")
+    validate_revenue(df, ANALYSIS_NAME)
+    
+    print(f"\n{ANALYSIS_NAME} complete!")
+    print(f"Chart saved to: {OUTPUT_DIR}")
+
+# ============================================================================
+# RUN ANALYSIS
+# ============================================================================
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/cohort_analysis.py b/examples/cohort_analysis.py
new file mode 100644
index 0000000..02a8035
--- /dev/null
+++ b/examples/cohort_analysis.py
@@ -0,0 +1,218 @@
+"""
+Example: Cohort Analysis
+Advanced example showing customer cohort retention analysis
+
+This demonstrates:
+- Cohort-based analysis
+- Retention rate calculations
+- Revenue retention metrics
+- Advanced visualization
+"""
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from pathlib import Path
+from operator import attrgetter
+
+# Import utilities
+from data_loader import load_sales_data, validate_data_structure
+from validate_revenue import validate_revenue
+from analysis_utils import (
+    get_ltm_period_config, apply_exclusion_filters,
+    setup_revenue_chart, save_chart, format_currency
+)
+from config import (
+    OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
+    get_data_path, COMPANY_NAME, REVENUE_COLUMN, CUSTOMER_COLUMN,
+    DATE_COLUMN, MIN_YEAR
+)
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+ANALYSIS_NAME = "Cohort Analysis"
+DESCRIPTION = "Customer cohort retention and revenue analysis"
+
+# ============================================================================
+# COHORT ANALYSIS FUNCTIONS
+# ============================================================================
+
+def create_cohorts(df):
+    """
+    Create customer cohorts based on first purchase date
+    
+    Args:
+        df: DataFrame with customer and date columns
+    
+    Returns:
+        DataFrame: Original DataFrame with 'Cohort' and 'CohortPeriod' columns
+    """
+    from config import CUSTOMER_COLUMN, DATE_COLUMN
+    
+    # Get first purchase date for each customer
+    first_purchase = df.groupby(CUSTOMER_COLUMN)[DATE_COLUMN].min().reset_index()
+    first_purchase.columns = [CUSTOMER_COLUMN, 'FirstPurchaseDate']
+    
+    # Extract cohort year-month
+    first_purchase['Cohort'] = first_purchase['FirstPurchaseDate'].dt.to_period('M')
+    
+    # Merge back to original data
+    df_with_cohort = df.merge(first_purchase[[CUSTOMER_COLUMN, 'Cohort']], on=CUSTOMER_COLUMN)
+    
+    # Calculate period number (months since first purchase)
+    df_with_cohort['Period'] = df_with_cohort[DATE_COLUMN].dt.to_period('M')
+    df_with_cohort['CohortPeriod'] = (df_with_cohort['Period'] - df_with_cohort['Cohort']).apply(attrgetter('n'))
+    
+    return df_with_cohort
+
+def calculate_cohort_metrics(df_with_cohort):
+    """
+    Calculate cohort retention metrics
+    
+    Args:
+        df_with_cohort: DataFrame with Cohort and CohortPeriod columns
+    
+    Returns:
+        DataFrame: Cohort metrics by period
+    """
+    from config import REVENUE_COLUMN, CUSTOMER_COLUMN
+    
+    # Customer count by cohort and period
+    cohort_size = df_with_cohort.groupby('Cohort')[CUSTOMER_COLUMN].nunique()
+    
+    # Revenue by cohort and period
+    cohort_revenue = df_with_cohort.groupby(['Cohort', 'CohortPeriod']).agg({
+        CUSTOMER_COLUMN: 'nunique',
+        REVENUE_COLUMN: 'sum'
+    }).reset_index()
+    cohort_revenue.columns = ['Cohort', 'Period', 'Customers', 'Revenue']
+    
+    # Calculate retention rates
+    cohort_retention = []
+    for cohort in cohort_revenue['Cohort'].unique():
+        cohort_data = cohort_revenue[cohort_revenue['Cohort'] == cohort].copy()
+        initial_customers = cohort_data[cohort_data['Period'] == 0]['Customers'].values[0]
+        
+        cohort_data['Retention_Rate'] = (cohort_data['Customers'] / initial_customers) * 100
+        cohort_data['Revenue_Retention'] = cohort_data['Revenue'] / cohort_data[cohort_data['Period'] == 0]['Revenue'].values[0] * 100
+        
+        cohort_retention.append(cohort_data)
+    
+    return pd.concat(cohort_retention, ignore_index=True)
+
+# ============================================================================
+# MAIN ANALYSIS FUNCTION
+# ============================================================================
+
+def main():
+    """Main analysis function"""
+    
+    print(f"\n{'='*60}")
+    print(f"{ANALYSIS_NAME}")
+    print(f"{'='*60}\n")
+    
+    # 1. Load data
+    print("Loading data...")
+    try:
+        df = load_sales_data(get_data_path())
+        print(f"Loaded {len(df):,} transactions")
+    except Exception as e:
+        print(f"ERROR loading data: {e}")
+        return
+    
+    # 2. Validate
+    is_valid, msg = validate_data_structure(df)
+    if not is_valid:
+        print(f"ERROR: {msg}")
+        return
+    
+    if CUSTOMER_COLUMN not in df.columns:
+        print(f"ERROR: Customer column '{CUSTOMER_COLUMN}' not found")
+        return
+    
+    # 3. Apply filters
+    df = apply_exclusion_filters(df)
+    df = df[df['Year'] >= MIN_YEAR]
+    if DATE_COLUMN in df.columns:
+        df = df[df[DATE_COLUMN] <= MAX_DATE]
+    
+    # 4. Create cohorts
+    print("\nCreating customer cohorts...")
+    df_cohort = create_cohorts(df)
+    
+    # 5. Calculate cohort metrics
+    print("Calculating cohort metrics...")
+    cohort_metrics = calculate_cohort_metrics(df_cohort)
+    
+    # 6. Print summary
+    print("\nCohort Summary:")
+    print("-" * 60)
+    for cohort in sorted(cohort_metrics['Cohort'].unique())[:5]:  # Show top 5 cohorts
+        cohort_data = cohort_metrics[cohort_metrics['Cohort'] == cohort]
+        period_0 = cohort_data[cohort_data['Period'] == 0]
+        if len(period_0) > 0:
+            initial_customers = period_0['Customers'].values[0]
+            initial_revenue = period_0['Revenue'].values[0]
+            print(f"\n{cohort}:")
+            print(f"  Initial: {initial_customers:,} customers, {format_currency(initial_revenue)}")
+            
+            # Show retention at period 12
+            period_12 = cohort_data[cohort_data['Period'] == 12]
+            if len(period_12) > 0:
+                retention = period_12['Retention_Rate'].values[0]
+                revenue_ret = period_12['Revenue_Retention'].values[0]
+                print(f"  Period 12: {retention:.1f}% customer retention, {revenue_ret:.1f}% revenue retention")
+    
+    # 7. Create visualizations
+    print("\nGenerating charts...")
+    ensure_directories()
+    
+    # Heatmap: Customer retention
+    pivot_retention = cohort_metrics.pivot_table(
+        index='Cohort',
+        columns='Period',
+        values='Retention_Rate',
+        aggfunc='mean'
+    )
+    
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
+    
+    # Retention heatmap
+    sns.heatmap(pivot_retention, annot=True, fmt='.0f', cmap='YlOrRd', ax=ax1, cbar_kws={'label': 'Retention %'})
+    ax1.set_title('Customer Retention by Cohort\n(Period 0 = 100%)', fontsize=12, fontweight='bold')
+    ax1.set_xlabel('Months Since First Purchase')
+    ax1.set_ylabel('Cohort')
+    
+    # Revenue retention heatmap
+    pivot_revenue = cohort_metrics.pivot_table(
+        index='Cohort',
+        columns='Period',
+        values='Revenue_Retention',
+        aggfunc='mean'
+    )
+    
+    sns.heatmap(pivot_revenue, annot=True, fmt='.0f', cmap='YlGnBu', ax=ax2, cbar_kws={'label': 'Revenue Retention %'})
+    ax2.set_title('Revenue Retention by Cohort\n(Period 0 = 100%)', fontsize=12, fontweight='bold')
+    ax2.set_xlabel('Months Since First Purchase')
+    ax2.set_ylabel('Cohort')
+    
+    plt.suptitle(f'Cohort Analysis - {COMPANY_NAME}', fontsize=14, fontweight='bold', y=1.02)
+    plt.tight_layout()
+    save_chart(fig, 'cohort_analysis.png')
+    plt.close()
+    
+    # 8. Validate
+    print("\nValidating revenue...")
+    validate_revenue(df, ANALYSIS_NAME)
+    
+    print(f"\n{ANALYSIS_NAME} complete!")
+    print(f"Charts saved to: {OUTPUT_DIR}")
+
+# ============================================================================
+# RUN ANALYSIS
+# ============================================================================
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/customer_segmentation.py b/examples/customer_segmentation.py
new file mode 100644
index 0000000..c12369f
--- /dev/null
+++ b/examples/customer_segmentation.py
@@ -0,0 +1,213 @@
+"""
+Example: Customer Segmentation (RFM) Analysis
+Example showing customer segmentation using RFM methodology
+
+This example demonstrates:
+- Customer-level aggregation
+- RFM segmentation (Recency, Frequency, Monetary)
+- Segment analysis and visualization
+"""
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+
+# Import utilities
+from data_loader import load_sales_data, validate_data_structure
+from validate_revenue import validate_revenue
+from analysis_utils import (
+    get_ltm_period_config, apply_exclusion_filters,
+    setup_revenue_chart, save_chart, format_currency
+)
+from config import (
+    OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
+    get_data_path, COMPANY_NAME, REVENUE_COLUMN, CUSTOMER_COLUMN,
+    DATE_COLUMN, MIN_YEAR
+)
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+ANALYSIS_NAME = "Customer Segmentation (RFM)"
+DESCRIPTION = "Customer segmentation using RFM methodology"
+
+# ============================================================================
+# RFM SEGMENTATION FUNCTIONS
+# ============================================================================
+
+def calculate_rfm_scores(df, analysis_date=None):
+    """
+    Calculate RFM scores for each customer
+    
+    Args:
+        df: DataFrame with customer, date, and revenue columns
+        analysis_date: Reference date for recency calculation (defaults to max date)
+    
+    Returns:
+        DataFrame with RFM scores and segment assignment
+    """
+    if analysis_date is None:
+        analysis_date = df[DATE_COLUMN].max()
+    
+    # Calculate customer-level metrics
+    customer_metrics = df.groupby(CUSTOMER_COLUMN).agg({
+        DATE_COLUMN: ['max', 'count'],
+        REVENUE_COLUMN: 'sum'
+    }).reset_index()
+    
+    customer_metrics.columns = [CUSTOMER_COLUMN, 'LastPurchaseDate', 'Frequency', 'Monetary']
+    
+    # Calculate Recency (days since last purchase)
+    customer_metrics['Recency'] = (analysis_date - customer_metrics['LastPurchaseDate']).dt.days
+    
+    # Score each dimension (1-5 scale, 5 = best)
+    customer_metrics['R_Score'] = pd.qcut(
+        customer_metrics['Recency'].rank(method='first'),
+        q=5, labels=[5, 4, 3, 2, 1], duplicates='drop'
+    ).astype(int)
+    
+    customer_metrics['F_Score'] = pd.qcut(
+        customer_metrics['Frequency'].rank(method='first'),
+        q=5, labels=[1, 2, 3, 4, 5], duplicates='drop'
+    ).astype(int)
+    
+    customer_metrics['M_Score'] = pd.qcut(
+        customer_metrics['Monetary'].rank(method='first'),
+        q=5, labels=[1, 2, 3, 4, 5], duplicates='drop'
+    ).astype(int)
+    
+    # Calculate RFM score (sum of R, F, M)
+    customer_metrics['RFM_Score'] = (
+        customer_metrics['R_Score'] +
+        customer_metrics['F_Score'] +
+        customer_metrics['M_Score']
+    )
+    
+    # Assign segments
+    def assign_segment(row):
+        r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
+        if r >= 4 and f >= 4 and m >= 4:
+            return 'Champions'
+        elif r >= 3 and f >= 3 and m >= 4:
+            return 'Loyal Customers'
+        elif r >= 4 and f <= 2:
+            return 'At Risk'
+        elif r <= 2:
+            return 'Hibernating'
+        elif r >= 3 and f >= 3 and m <= 2:
+            return 'Potential Loyalists'
+        else:
+            return 'Need Attention'
+    
+    customer_metrics['Segment'] = customer_metrics.apply(assign_segment, axis=1)
+    
+    return customer_metrics
+
+# ============================================================================
+# MAIN ANALYSIS FUNCTION
+# ============================================================================
+
+def main():
+    """Main analysis function"""
+    
+    print(f"\n{'='*60}")
+    print(f"{ANALYSIS_NAME}")
+    print(f"{'='*60}\n")
+    
+    # 1. Load data
+    print("Loading data...")
+    try:
+        df = load_sales_data(get_data_path())
+        print(f"Loaded {len(df):,} transactions")
+    except Exception as e:
+        print(f"ERROR loading data: {e}")
+        return
+    
+    # 2. Validate data structure
+    is_valid, msg = validate_data_structure(df)
+    if not is_valid:
+        print(f"ERROR: {msg}")
+        return
+    
+    if CUSTOMER_COLUMN not in df.columns:
+        print(f"ERROR: Customer column '{CUSTOMER_COLUMN}' not found in data")
+        return
+    
+    print("Data validation passed")
+    
+    # 3. Apply exclusion filters
+    df = apply_exclusion_filters(df)
+    
+    # 4. Filter by date range
+    df = df[df['Year'] >= MIN_YEAR]
+    if DATE_COLUMN in df.columns:
+        df = df[df[DATE_COLUMN] <= MAX_DATE]
+    
+    # 5. Calculate RFM scores
+    print("\nCalculating RFM scores...")
+    rfm_df = calculate_rfm_scores(df)
+    
+    # 6. Segment summary
+    print("\nCustomer Segmentation Summary:")
+    print("-" * 60)
+    segment_summary = rfm_df.groupby('Segment').agg({
+        CUSTOMER_COLUMN: 'count',
+        'Monetary': 'sum'
+    }).reset_index()
+    segment_summary.columns = ['Segment', 'Customer Count', 'Total Revenue']
+    segment_summary = segment_summary.sort_values('Total Revenue', ascending=False)
+    
+    for _, row in segment_summary.iterrows():
+        pct_customers = (row['Customer Count'] / len(rfm_df)) * 100
+        pct_revenue = (row['Total Revenue'] / rfm_df['Monetary'].sum()) * 100
+        print(f"{row['Segment']:20s}: {row['Customer Count']:5d} customers ({pct_customers:5.1f}%), "
+              f"{format_currency(row['Total Revenue'])} ({pct_revenue:5.1f}% of revenue)")
+    
+    # 7. Create visualizations
+    print("\nGenerating charts...")
+    ensure_directories()
+    
+    # Chart 1: Revenue by Segment
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
+    
+    segment_summary_sorted = segment_summary.sort_values('Total Revenue', ascending=True)
+    revenue_millions = segment_summary_sorted['Total Revenue'].values / 1e6
+    
+    ax1.barh(range(len(segment_summary_sorted)), revenue_millions, color='#2E86AB')
+    ax1.set_yticks(range(len(segment_summary_sorted)))
+    ax1.set_yticklabels(segment_summary_sorted['Segment'].values)
+    ax1.set_xlabel('Revenue (Millions USD)')
+    ax1.set_title('Revenue by Customer Segment', fontsize=12, fontweight='bold')
+    setup_revenue_chart(ax1)
+    ax1.set_ylabel('')
+    
+    # Chart 2: Customer Count by Segment
+    customer_counts = segment_summary_sorted['Customer Count'].values
+    ax2.barh(range(len(segment_summary_sorted)), customer_counts, color='#A23B72')
+    ax2.set_yticks(range(len(segment_summary_sorted)))
+    ax2.set_yticklabels(segment_summary_sorted['Segment'].values)
+    ax2.set_xlabel('Number of Customers')
+    ax2.set_title('Customer Count by Segment', fontsize=12, fontweight='bold')
+    ax2.set_ylabel('')
+    ax2.grid(True, alpha=0.3)
+    
+    plt.suptitle(f'Customer Segmentation Analysis - {COMPANY_NAME}', 
+                 fontsize=14, fontweight='bold', y=1.02)
+    plt.tight_layout()
+    save_chart(fig, 'customer_segmentation.png')
+    plt.close()
+    
+    # 8. Validate revenue
+    print("\nValidating revenue...")
+    validate_revenue(df, ANALYSIS_NAME)
+    
+    print(f"\n{ANALYSIS_NAME} complete!")
+    print(f"Charts saved to: {OUTPUT_DIR}")
+
+# ============================================================================
+# RUN ANALYSIS
+# ============================================================================
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/product_performance.py b/examples/product_performance.py
new file mode 100644
index 0000000..47b7ad6
--- /dev/null
+++ b/examples/product_performance.py
@@ -0,0 +1,203 @@
+"""
+Example: Product Performance Analysis
+Example showing product mix and performance analysis
+
+This example demonstrates:
+- Product-level aggregation
+- Product performance metrics
+- Product mix visualization
+"""
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+
+# Import utilities
+from data_loader import load_sales_data, validate_data_structure
+from validate_revenue import validate_revenue
+from analysis_utils import (
+    get_ltm_period_config, calculate_annual_metrics,
+    apply_exclusion_filters, setup_revenue_chart, save_chart,
+    format_currency, sort_mixed_years
+)
+from config import (
+    OUTPUT_DIR, MAX_DATE, CHART_SIZES, ensure_directories,
+    get_data_path, COMPANY_NAME, REVENUE_COLUMN, ITEM_COLUMN,
+    DATE_COLUMN, MIN_YEAR, QUANTITY_COLUMN
+)
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+ANALYSIS_NAME = "Product Performance Analysis"
+DESCRIPTION = "Product mix and performance analysis"
+
+# ============================================================================
+# MAIN ANALYSIS FUNCTION
+# ============================================================================
+
+def main():
+    """Main analysis function"""
+    
+    print(f"\n{'='*60}")
+    print(f"{ANALYSIS_NAME}")
+    print(f"{'='*60}\n")
+    
+    # 1. Load data
+    print("Loading data...")
+    try:
+        df = load_sales_data(get_data_path())
+        print(f"Loaded {len(df):,} transactions")
+    except Exception as e:
+        print(f"ERROR loading data: {e}")
+        return
+    
+    # 2. Validate data structure
+    is_valid, msg = validate_data_structure(df)
+    if not is_valid:
+        print(f"ERROR: {msg}")
+        return
+    
+    if ITEM_COLUMN not in df.columns:
+        print(f"WARNING: Item column '{ITEM_COLUMN}' not found. Using transaction-level analysis.")
+        # Create a dummy item column for demonstration
+        df[ITEM_COLUMN] = 'All Products'
+    
+    print("Data validation passed")
+    
+    # 3. Apply exclusion filters
+    df = apply_exclusion_filters(df)
+    
+    # 4. Filter by date range
+    df = df[df['Year'] >= MIN_YEAR]
+    if DATE_COLUMN in df.columns:
+        df = df[df[DATE_COLUMN] <= MAX_DATE]
+    
+    # 5. Setup LTM period
+    ltm_start, ltm_end = get_ltm_period_config()
+    
+    # 6. Product performance summary
+    print("\nCalculating product performance...")
+    
+    # Get most recent period data
+    if ltm_start and ltm_end and 'YearMonth' in df.columns:
+        recent_data = df[(df['YearMonth'] >= ltm_start) & (df['YearMonth'] <= ltm_end)]
+        period_label = f"LTM {ltm_end}"
+    else:
+        recent_year = df['Year'].max()
+        recent_data = df[df['Year'] == recent_year]
+        period_label = str(recent_year)
+    
+    # Product-level metrics
+    product_metrics = recent_data.groupby(ITEM_COLUMN).agg({
+        REVENUE_COLUMN: ['sum', 'count'],
+        QUANTITY_COLUMN: 'sum' if QUANTITY_COLUMN in df.columns else 'count'
+    }).reset_index()
+    
+    product_metrics.columns = [ITEM_COLUMN, 'Revenue', 'Transaction_Count', 'Quantity']
+    
+    # Calculate average price per unit if quantity available
+    if QUANTITY_COLUMN in df.columns:
+        product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Quantity'].replace(0, np.nan)
+    else:
+        product_metrics['Avg_Price'] = product_metrics['Revenue'] / product_metrics['Transaction_Count']
+    
+    # Sort by revenue
+    product_metrics = product_metrics.sort_values('Revenue', ascending=False)
+    
+    # Top products summary
+    print(f"\nTop 10 Products by Revenue ({period_label}):")
+    print("-" * 80)
+    top_10 = product_metrics.head(10)
+    total_revenue = product_metrics['Revenue'].sum()
+    
+    for idx, row in top_10.iterrows():
+        pct = (row['Revenue'] / total_revenue) * 100
+        print(f"{row[ITEM_COLUMN]:30s}: {format_currency(row['Revenue']):>12s} ({pct:5.1f}%)")
+    
+    # 7. Annual product trends (if multiple years available)
+    if len(df['Year'].unique()) > 1:
+        print("\nCalculating annual product trends...")
+        
+        def calculate_product_metrics(year_data):
+            """Calculate product metrics for a year"""
+            product_revenue = year_data.groupby(ITEM_COLUMN)[REVENUE_COLUMN].sum()
+            # Get top 5 products
+            top_5 = product_revenue.nlargest(5)
+            return dict(top_5)
+        
+        annual_product_df = calculate_annual_metrics(df, calculate_product_metrics, ltm_start, ltm_end)
+        
+        # 8. Create visualizations
+        print("\nGenerating charts...")
+        ensure_directories()
+        
+        # Chart 1: Top Products Revenue (Bar Chart)
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=CHART_SIZES['wide'])
+        
+        top_10_revenue = top_10['Revenue'].values / 1e6
+        top_10_names = top_10[ITEM_COLUMN].values
+        
+        ax1.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
+        ax1.set_yticks(range(len(top_10)))
+        ax1.set_yticklabels([name[:30] + '...' if len(name) > 30 else name for name in top_10_names])
+        ax1.set_xlabel('Revenue (Millions USD)')
+        ax1.set_title(f'Top 10 Products by Revenue\n({period_label})', fontsize=12, fontweight='bold')
+        setup_revenue_chart(ax1)
+        ax1.set_ylabel('')
+        
+        # Chart 2: Revenue Distribution (Pie Chart for top 10)
+        if len(product_metrics) > 10:
+            other_revenue = product_metrics.iloc[10:]['Revenue'].sum()
+            pie_data = list(top_10['Revenue'].values) + [other_revenue]
+            pie_labels = list(top_10[ITEM_COLUMN].values) + ['Other']
+        else:
+            pie_data = product_metrics['Revenue'].values
+            pie_labels = product_metrics[ITEM_COLUMN].values
+        
+        pie_data_millions = [x / 1e6 for x in pie_data]
+        ax2.pie(pie_data_millions, labels=pie_labels, autopct='%1.1f%%', startangle=90)
+        ax2.set_title('Revenue Distribution\n(Top Products)', fontsize=12, fontweight='bold')
+        
+        plt.suptitle(f'Product Performance Analysis - {COMPANY_NAME}', 
+                     fontsize=14, fontweight='bold', y=1.02)
+        plt.tight_layout()
+        save_chart(fig, 'product_performance.png')
+        plt.close()
+    else:
+        # Single chart if only one year
+        print("\nGenerating chart...")
+        ensure_directories()
+        
+        fig, ax = plt.subplots(figsize=CHART_SIZES['medium'])
+        
+        top_10_revenue = top_10['Revenue'].values / 1e6
+        top_10_names = top_10[ITEM_COLUMN].values
+        
+        ax.barh(range(len(top_10)), top_10_revenue, color='#2E86AB')
+        ax.set_yticks(range(len(top_10)))
+        ax.set_yticklabels([name[:40] + '...' if len(name) > 40 else name for name in top_10_names])
+        ax.set_xlabel('Revenue (Millions USD)')
+        ax.set_title(f'Top 10 Products by Revenue - {COMPANY_NAME}\n({period_label})', 
+                     fontsize=14, fontweight='bold')
+        setup_revenue_chart(ax)
+        ax.set_ylabel('')
+        
+        plt.tight_layout()
+        save_chart(fig, 'product_performance.png')
+        plt.close()
+    
+    # 9. Validate revenue
+    print("\nValidating revenue...")
+    validate_revenue(df, ANALYSIS_NAME)
+    
+    print(f"\n{ANALYSIS_NAME} complete!")
+    print(f"Charts saved to: {OUTPUT_DIR}")
+
+# ============================================================================
+# RUN ANALYSIS
+# ============================================================================
+
+if __name__ == "__main__":
+    main()
diff --git a/export_utils.py b/export_utils.py
new file mode 100644
index 0000000..6a1d75f
--- /dev/null
+++ b/export_utils.py
@@ -0,0 +1,238 @@
+"""
+Export utilities for analysis results
+Provides functions to export DataFrames and summary data to CSV and Excel
+
+Usage:
+    from export_utils import export_to_csv, export_to_excel, export_summary_table
+    
+    # Export DataFrame to CSV
+    export_to_csv(df, 'results.csv')
+    
+    # Export DataFrame to Excel
+    export_to_excel(df, 'results.xlsx', sheet_name='Data')
+    
+    # Export summary table
+    export_summary_table({'Metric1': 100, 'Metric2': 200}, 'summary.xlsx')
+"""
+import pandas as pd
+from pathlib import Path
+from config import REPORTS_DIR, ensure_directories
+
+def export_to_csv(df, filename, output_dir=None, index=True):
+    """
+    Export DataFrame to CSV with proper formatting
+    
+    Args:
+        df: DataFrame to export
+        filename: Output filename (e.g., 'results.csv')
+        output_dir: Output directory (defaults to config.REPORTS_DIR)
+        index: Whether to include index in export (default: True)
+    
+    Returns:
+        Path to exported file
+    """
+    if output_dir is None:
+        output_dir = REPORTS_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    ensure_directories()
+    output_dir.mkdir(exist_ok=True)
+    
+    filepath = output_dir / filename
+    
+    df.to_csv(filepath, index=index, encoding='utf-8-sig')
+    print(f"Exported to CSV: {filepath}")
+    
+    return filepath
+
+def export_to_excel(df, filename, sheet_name='Data', output_dir=None, index=True):
+    """
+    Export DataFrame to Excel with formatting
+    
+    Args:
+        df: DataFrame to export
+        filename: Output filename (e.g., 'results.xlsx')
+        sheet_name: Excel sheet name (default: 'Data')
+        output_dir: Output directory (defaults to config.REPORTS_DIR)
+        index: Whether to include index in export (default: True)
+    
+    Returns:
+        Path to exported file
+    
+    Raises:
+        ImportError: If openpyxl is not installed
+    """
+    try:
+        import openpyxl
+    except ImportError:
+        raise ImportError(
+            "openpyxl is required for Excel export. Install with: pip install openpyxl"
+        )
+    
+    if output_dir is None:
+        output_dir = REPORTS_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    ensure_directories()
+    output_dir.mkdir(exist_ok=True)
+    
+    filepath = output_dir / filename
+    
+    # Create Excel writer
+    with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
+        df.to_excel(writer, sheet_name=sheet_name, index=index)
+        
+        # Auto-adjust column widths
+        worksheet = writer.sheets[sheet_name]
+        for idx, col in enumerate(df.columns, 1):
+            max_length = max(
+                df[col].astype(str).map(len).max(),
+                len(str(col))
+            )
+            # Cap at 50 characters for readability
+            adjusted_width = min(max_length + 2, 50)
+            worksheet.column_dimensions[chr(64 + idx)].width = adjusted_width
+    
+    print(f"Exported to Excel: {filepath}")
+    
+    return filepath
+
+def export_summary_table(data_dict, filename, output_dir=None, title=None):
+    """
+    Export summary statistics to formatted table (Excel)
+    
+    Args:
+        data_dict: Dictionary of {metric_name: value} pairs
+        filename: Output filename (e.g., 'summary.xlsx')
+        output_dir: Output directory (defaults to config.REPORTS_DIR)
+        title: Optional title for the summary table
+    
+    Returns:
+        Path to exported file
+    
+    Example:
+        export_summary_table({
+            'Total Revenue': 1000000,
+            'Customer Count': 500,
+            'Average Order Value': 2000
+        }, 'summary.xlsx')
+    """
+    try:
+        import openpyxl
+    except ImportError:
+        raise ImportError(
+            "openpyxl is required for Excel export. Install with: pip install openpyxl"
+        )
+    
+    if output_dir is None:
+        output_dir = REPORTS_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    ensure_directories()
+    output_dir.mkdir(exist_ok=True)
+    
+    filepath = output_dir / filename
+    
+    # Create DataFrame from dictionary
+    df = pd.DataFrame({
+        'Metric': list(data_dict.keys()),
+        'Value': list(data_dict.values())
+    })
+    
+    # Format numeric values
+    def format_value(val):
+        if isinstance(val, (int, float)):
+            if abs(val) >= 1e6:
+                return f"${val / 1e6:.2f}m"
+            elif abs(val) >= 1e3:
+                return f"${val / 1e3:.2f}k"
+            else:
+                return f"${val:.2f}"
+        return str(val)
+    
+    df['Formatted_Value'] = df['Value'].apply(format_value)
+    
+    # Create Excel writer
+    with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
+        df.to_excel(writer, sheet_name='Summary', index=False)
+        
+        # Format worksheet
+        worksheet = writer.sheets['Summary']
+        
+        # Set column widths
+        worksheet.column_dimensions['A'].width = 30
+        worksheet.column_dimensions['B'].width = 20
+        worksheet.column_dimensions['C'].width = 20
+        
+        # Add title if provided
+        if title:
+            worksheet.insert_rows(1)
+            worksheet.merge_cells('A1:C1')
+            worksheet['A1'] = title
+            worksheet['A1'].font = openpyxl.styles.Font(bold=True, size=14)
+            worksheet['A1'].alignment = openpyxl.styles.Alignment(horizontal='center')
+    
+    print(f"Exported summary table to Excel: {filepath}")
+    
+    return filepath
+
+def export_multiple_sheets(data_dict, filename, output_dir=None):
+    """
+    Export multiple DataFrames to Excel with multiple sheets
+    
+    Args:
+        data_dict: Dictionary of {sheet_name: DataFrame} pairs
+        filename: Output filename (e.g., 'results.xlsx')
+        output_dir: Output directory (defaults to config.REPORTS_DIR)
+    
+    Returns:
+        Path to exported file
+    
+    Example:
+        export_multiple_sheets({
+            'Revenue': revenue_df,
+            'Customers': customer_df,
+            'Products': product_df
+        }, 'analysis_results.xlsx')
+    """
+    try:
+        import openpyxl
+    except ImportError:
+        raise ImportError(
+            "openpyxl is required for Excel export. Install with: pip install openpyxl"
+        )
+    
+    if output_dir is None:
+        output_dir = REPORTS_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    ensure_directories()
+    output_dir.mkdir(exist_ok=True)
+    
+    filepath = output_dir / filename
+    
+    # Create Excel writer
+    with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
+        for sheet_name, df in data_dict.items():
+            # Truncate sheet name to 31 characters (Excel limit)
+            safe_sheet_name = sheet_name[:31]
+            df.to_excel(writer, sheet_name=safe_sheet_name, index=True)
+            
+            # Auto-adjust column widths
+            worksheet = writer.sheets[safe_sheet_name]
+            for idx, col in enumerate(df.columns, 1):
+                max_length = max(
+                    df[col].astype(str).map(len).max(),
+                    len(str(col))
+                )
+                adjusted_width = min(max_length + 2, 50)
+                col_letter = openpyxl.utils.get_column_letter(idx)
+                worksheet.column_dimensions[col_letter].width = adjusted_width
+    
+    print(f"Exported {len(data_dict)} sheets to Excel: {filepath}")
+    
+    return filepath
diff --git a/generate_sample_data.py b/generate_sample_data.py
new file mode 100644
index 0000000..72b1014
--- /dev/null
+++ b/generate_sample_data.py
@@ -0,0 +1,184 @@
+"""
+Sample data generator for testing and demonstrations
+Generates realistic sample sales data
+
+Usage:
+    python generate_sample_data.py
+    
+    # Or import and use programmatically:
+    from generate_sample_data import generate_sample_sales_data
+    df = generate_sample_sales_data(num_customers=100, num_products=50, years=[2021, 2022, 2023])
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from datetime import datetime, timedelta
+import random
+
+def generate_sample_sales_data(
+    num_customers=100,
+    num_products=50,
+    years=[2021, 2022, 2023, 2024, 2025],
+    transactions_per_month=500,
+    output_file='sample_sales_data.csv'
+):
+    """
+    Generate realistic sample sales data
+    
+    Args:
+        num_customers: Number of unique customers
+        num_products: Number of unique products
+        years: List of years to generate data for
+        transactions_per_month: Average transactions per month
+        output_file: Output CSV filename
+    
+    Returns:
+        DataFrame: Generated sales data
+    """
+    print(f"Generating sample sales data...")
+    print(f"  Customers: {num_customers}")
+    print(f"  Products: {num_products}")
+    print(f"  Years: {years}")
+    
+    # Generate customer names
+    customer_names = [f"Customer_{i:04d}" for i in range(1, num_customers + 1)]
+    
+    # Generate product names
+    product_names = [f"Product_{i:04d}" for i in range(1, num_products + 1)]
+    
+    # Generate transactions
+    transactions = []
+    
+    for year in years:
+        for month in range(1, 13):
+            # Skip future months
+            current_date = datetime.now()
+            if year > current_date.year or (year == current_date.year and month > current_date.month):
+                continue
+            
+            # Generate transactions for this month
+            num_transactions = int(np.random.normal(transactions_per_month, transactions_per_month * 0.2))
+            num_transactions = max(10, num_transactions)  # At least 10 transactions
+            
+            for _ in range(num_transactions):
+                # Random date within month
+                if month == 2:
+                    max_day = 28
+                elif month in [4, 6, 9, 11]:
+                    max_day = 30
+                else:
+                    max_day = 31
+                
+                day = random.randint(1, max_day)
+                invoice_date = datetime(year, month, day)
+                
+                # Random customer and product
+                customer = random.choice(customer_names)
+                product = random.choice(product_names)
+                
+                # Generate quantity (most transactions are small)
+                quantity = int(np.random.lognormal(mean=1.5, sigma=1.0))
+                quantity = max(1, min(quantity, 100))  # Cap at 100
+                
+                # Generate revenue (with some correlation to quantity)
+                base_price = np.random.lognormal(mean=5, sigma=1.5)
+                revenue = base_price * quantity
+                
+                # Add some variation
+                revenue *= np.random.uniform(0.8, 1.2)
+                revenue = round(revenue, 2)
+                
+                transactions.append({
+                    'InvoiceDate': invoice_date,
+                    'Customer': customer,
+                    'Item': product,
+                    'Quantity': quantity,
+                    'USD': revenue,
+                    'Year': year,
+                    'Month': month
+                })
+    
+    # Create DataFrame
+    df = pd.DataFrame(transactions)
+    
+    # Sort by date
+    df = df.sort_values('InvoiceDate').reset_index(drop=True)
+    
+    # Add some missing dates (realistic data quality issue)
+    missing_date_pct = 0.05  # 5% missing dates
+    num_missing = int(len(df) * missing_date_pct)
+    missing_indices = np.random.choice(df.index, size=num_missing, replace=False)
+    df.loc[missing_indices, 'InvoiceDate'] = pd.NaT
+    
+    # Save to CSV
+    output_path = Path(output_file)
+    df.to_csv(output_path, index=False)
+    print(f"\n✅ Sample data generated: {output_path}")
+    print(f"   Rows: {len(df):,}")
+    print(f"   Date range: {df['InvoiceDate'].min()} to {df['InvoiceDate'].max()}")
+    print(f"   Total revenue: ${df['USD'].sum() / 1e6:.2f}m")
+    
+    return df
+
+def generate_sample_data_for_template():
+    """
+    Generate sample data matching template's expected structure
+    Uses config.py column names
+    """
+    from config import (
+        REVENUE_COLUMN, DATE_COLUMN, CUSTOMER_COLUMN, ITEM_COLUMN,
+        QUANTITY_COLUMN, ANALYSIS_YEARS
+    )
+    
+    print("Generating sample data for template...")
+    
+    df = generate_sample_sales_data(
+        num_customers=200,
+        num_products=100,
+        years=ANALYSIS_YEARS,
+        transactions_per_month=1000,
+        output_file='sample_sales_data.csv'
+    )
+    
+    # Rename columns to match config (if different)
+    column_mapping = {
+        'USD': REVENUE_COLUMN,
+        'InvoiceDate': DATE_COLUMN,
+        'Customer': CUSTOMER_COLUMN,
+        'Item': ITEM_COLUMN,
+        'Quantity': QUANTITY_COLUMN
+    }
+    
+    # Only rename if different
+    for old_name, new_name in column_mapping.items():
+        if old_name in df.columns and old_name != new_name:
+            df = df.rename(columns={old_name: new_name})
+    
+    # Save
+    output_path = Path('sample_sales_data.csv')
+    df.to_csv(output_path, index=False)
+    
+    print(f"\n✅ Sample data saved to: {output_path}")
+    print(f"   Ready to use with sales_analysis_template")
+    
+    return df
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+if __name__ == "__main__":
+    """Generate sample data"""
+    import sys
+    
+    if len(sys.argv) > 1:
+        # Custom generation
+        num_customers = int(sys.argv[1]) if len(sys.argv) > 1 else 100
+        num_products = int(sys.argv[2]) if len(sys.argv) > 2 else 50
+        generate_sample_sales_data(
+            num_customers=num_customers,
+            num_products=num_products
+        )
+    else:
+        # Generate for template
+        generate_sample_data_for_template()
diff --git a/logger_config.py b/logger_config.py
new file mode 100644
index 0000000..969d021
--- /dev/null
+++ b/logger_config.py
@@ -0,0 +1,197 @@
+"""
+Logging configuration for analysis scripts
+Provides structured logging with file and console output
+
+Usage:
+    from logger_config import get_logger
+    
+    logger = get_logger('my_analysis')
+    logger.info("Analysis started")
+    logger.warning("Low data quality detected")
+    logger.error("Failed to load data")
+"""
+import logging
+import sys
+from pathlib import Path
+from datetime import datetime
+from config import COMPANY_NAME, OUTPUT_DIR
+
+# Global logger instance
+_logger = None
+
+def setup_logging(log_level=logging.INFO, log_file=None, analysis_name=None):
+    """
+    Setup logging configuration
+    
+    Args:
+        log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
+        log_file: Path to log file (defaults to logs/analysis_YYYYMMDD_HHMMSS.log)
+        analysis_name: Name of analysis for log file naming
+    
+    Returns:
+        logging.Logger: Configured logger instance
+    """
+    global _logger
+    
+    # Create logs directory
+    logs_dir = Path('logs')
+    logs_dir.mkdir(exist_ok=True)
+    
+    # Default log file name
+    if log_file is None:
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        if analysis_name:
+            safe_name = analysis_name.lower().replace(' ', '_').replace('/', '_')
+            log_file = logs_dir / f"{safe_name}_{timestamp}.log"
+        else:
+            log_file = logs_dir / f"analysis_{timestamp}.log"
+    else:
+        log_file = Path(log_file)
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Create logger
+    logger = logging.getLogger(analysis_name or 'analysis')
+    logger.setLevel(log_level)
+    
+    # Remove existing handlers to avoid duplicates
+    logger.handlers = []
+    
+    # Create formatters
+    detailed_formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    
+    console_formatter = logging.Formatter(
+        '%(levelname)s - %(message)s'
+    )
+    
+    # File handler (detailed)
+    file_handler = logging.FileHandler(log_file, encoding='utf-8')
+    file_handler.setLevel(log_level)
+    file_handler.setFormatter(detailed_formatter)
+    logger.addHandler(file_handler)
+    
+    # Console handler (simpler)
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(log_level)
+    console_handler.setFormatter(console_formatter)
+    logger.addHandler(console_handler)
+    
+    # Log startup message
+    logger.info(f"="*60)
+    logger.info(f"Analysis: {analysis_name or 'Unknown'}")
+    logger.info(f"Company: {COMPANY_NAME}")
+    logger.info(f"Log File: {log_file}")
+    logger.info(f"="*60)
+    
+    _logger = logger
+    return logger
+
+def get_logger(analysis_name=None, log_level=logging.INFO):
+    """
+    Get or create logger instance
+    
+    Args:
+        analysis_name: Name of analysis
+        log_level: Logging level (default: INFO)
+    
+    Returns:
+        logging.Logger: Logger instance
+    """
+    global _logger
+    
+    if _logger is None:
+        _logger = setup_logging(log_level=log_level, analysis_name=analysis_name)
+    
+    return _logger
+
+def log_analysis_start(analysis_name, logger=None):
+    """
+    Log analysis start
+    
+    Args:
+        analysis_name: Name of analysis
+        logger: Logger instance (creates one if None)
+    """
+    if logger is None:
+        logger = get_logger(analysis_name)
+    
+    logger.info(f"Starting analysis: {analysis_name}")
+    logger.info(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+
+def log_analysis_end(analysis_name, success=True, logger=None):
+    """
+    Log analysis completion
+    
+    Args:
+        analysis_name: Name of analysis
+        success: Whether analysis completed successfully
+        logger: Logger instance (creates one if None)
+    """
+    if logger is None:
+        logger = get_logger(analysis_name)
+    
+    if success:
+        logger.info(f"Analysis completed successfully: {analysis_name}")
+    else:
+        logger.error(f"Analysis failed: {analysis_name}")
+    
+    logger.info(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    logger.info("="*60)
+
+def log_data_loading(df, logger=None):
+    """
+    Log data loading summary
+    
+    Args:
+        df: Loaded DataFrame
+        logger: Logger instance (creates one if None)
+    """
+    if logger is None:
+        logger = get_logger()
+    
+    logger.info(f"Data loaded: {len(df):,} rows, {len(df.columns)} columns")
+    
+    from config import REVENUE_COLUMN, DATE_COLUMN
+    if REVENUE_COLUMN in df.columns:
+        revenue = df[REVENUE_COLUMN].sum()
+        logger.info(f"Total revenue: ${revenue / 1e6:.2f}m")
+    
+    if DATE_COLUMN in df.columns:
+        date_coverage = df[DATE_COLUMN].notna().sum() / len(df) * 100
+        logger.info(f"Date coverage: {date_coverage:.1f}%")
+
+def log_error(error, logger=None, context=None):
+    """
+    Log error with context
+    
+    Args:
+        error: Exception or error message
+        logger: Logger instance (creates one if None)
+        context: Additional context string
+    """
+    if logger is None:
+        logger = get_logger()
+    
+    error_msg = str(error)
+    if context:
+        error_msg = f"{context}: {error_msg}"
+    
+    logger.error(error_msg, exc_info=True)
+
+# ============================================================================
+# EXAMPLE USAGE
+# ============================================================================
+
+if __name__ == "__main__":
+    """Example usage"""
+    logger = setup_logging(log_level=logging.DEBUG, analysis_name="Example Analysis")
+    
+    logger.debug("This is a debug message")
+    logger.info("This is an info message")
+    logger.warning("This is a warning message")
+    logger.error("This is an error message")
+    
+    log_analysis_start("Example Analysis", logger)
+    log_analysis_end("Example Analysis", success=True, logger)
diff --git a/report_generator.py b/report_generator.py
new file mode 100644
index 0000000..6939ee3
--- /dev/null
+++ b/report_generator.py
@@ -0,0 +1,228 @@
+"""
+Report generation utility
+Combines multiple charts and data into a PDF report
+
+Usage:
+    from report_generator import generate_pdf_report
+    
+    # Generate PDF report
+    generate_pdf_report(
+        charts=['chart1.png', 'chart2.png'],
+        title='Sales Analysis Report',
+        summary_data={'Total Revenue': 1000000}
+    )
+"""
+from pathlib import Path
+from datetime import datetime
+from config import COMPANY_NAME, OUTPUT_DIR, REPORTS_DIR, ensure_directories
+
+def generate_pdf_report(
+    charts,
+    title=None,
+    summary_data=None,
+    output_filename=None,
+    output_dir=None
+):
+    """
+    Generate PDF report from charts and summary data
+    
+    Args:
+        charts: List of chart file paths (PNG files)
+        title: Report title (defaults to company name + date)
+        summary_data: Dictionary of summary metrics
+        output_filename: Output PDF filename (defaults to report_YYYYMMDD_HHMMSS.pdf)
+        output_dir: Output directory (defaults to config.REPORTS_DIR)
+    
+    Returns:
+        Path: Path to generated PDF file
+    
+    Raises:
+        ImportError: If reportlab is not installed
+    """
+    try:
+        from reportlab.lib.pagesizes import letter, A4
+        from reportlab.lib.units import inch
+        from reportlab.lib import colors
+        from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak
+        from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+        from reportlab.lib.enums import TA_CENTER, TA_LEFT
+    except ImportError:
+        raise ImportError(
+            "reportlab is required for PDF generation. Install with: pip install reportlab"
+        )
+    
+    if output_dir is None:
+        output_dir = REPORTS_DIR
+    else:
+        output_dir = Path(output_dir)
+    
+    ensure_directories()
+    output_dir.mkdir(exist_ok=True)
+    
+    # Default filename
+    if output_filename is None:
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        output_filename = f"report_{timestamp}.pdf"
+    
+    output_path = output_dir / output_filename
+    
+    # Create PDF document
+    doc = SimpleDocTemplate(
+        str(output_path),
+        pagesize=letter,
+        rightMargin=0.75*inch,
+        leftMargin=0.75*inch,
+        topMargin=0.75*inch,
+        bottomMargin=0.75*inch
+    )
+    
+    # Container for PDF elements
+    story = []
+    
+    # Styles
+    styles = getSampleStyleSheet()
+    title_style = ParagraphStyle(
+        'CustomTitle',
+        parent=styles['Heading1'],
+        fontSize=20,
+        textColor=colors.HexColor('#2E86AB'),
+        spaceAfter=30,
+        alignment=TA_CENTER
+    )
+    
+    heading_style = ParagraphStyle(
+        'CustomHeading',
+        parent=styles['Heading2'],
+        fontSize=14,
+        textColor=colors.HexColor('#2E86AB'),
+        spaceAfter=12
+    )
+    
+    # Title
+    if title is None:
+        title = f"{COMPANY_NAME} Sales Analysis Report"
+    
+    story.append(Paragraph(title, title_style))
+    story.append(Spacer(1, 0.2*inch))
+    
+    # Report metadata
+    metadata_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+    story.append(Paragraph(metadata_text, styles['Normal']))
+    story.append(Spacer(1, 0.3*inch))
+    
+    # Summary data table
+    if summary_data:
+        story.append(Paragraph("Summary", heading_style))
+        
+        # Create table
+        table_data = [['Metric', 'Value']]
+        for key, value in summary_data.items():
+            # Format value
+            if isinstance(value, (int, float)):
+                if abs(value) >= 1e6:
+                    formatted_value = f"${value / 1e6:.2f}m"
+                elif abs(value) >= 1e3:
+                    formatted_value = f"${value / 1e3:.2f}k"
+                else:
+                    formatted_value = f"${value:.2f}"
+            else:
+                formatted_value = str(value)
+            
+            table_data.append([key, formatted_value])
+        
+        table = Table(table_data, colWidths=[3*inch, 2*inch])
+        table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2E86AB')),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('FONTSIZE', (0, 0), (-1, 0), 12),
+            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+            ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+            ('GRID', (0, 0), (-1, -1), 1, colors.black),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey])
+        ]))
+        
+        story.append(table)
+        story.append(Spacer(1, 0.3*inch))
+    
+    # Add charts
+    if charts:
+        story.append(Paragraph("Charts", heading_style))
+        
+        for i, chart_path in enumerate(charts, 1):
+            chart_path = Path(chart_path)
+            
+            if not chart_path.exists():
+                print(f"Warning: Chart not found: {chart_path}")
+                continue
+            
+            # Add chart title
+            chart_title = f"Chart {i}: {chart_path.stem.replace('_', ' ').title()}"
+            story.append(Paragraph(chart_title, styles['Heading3']))
+            story.append(Spacer(1, 0.1*inch))
+            
+            # Add image
+            try:
+                img = Image(str(chart_path), width=6*inch, height=4*inch)
+                story.append(img)
+            except Exception as e:
+                error_msg = f"Error loading chart: {e}"
+                story.append(Paragraph(error_msg, styles['Normal']))
+            
+            # Add page break between charts (except last one)
+            if i < len(charts):
+                story.append(PageBreak())
+    
+    # Build PDF
+    doc.build(story)
+    
+    print(f"PDF report generated: {output_path}")
+    
+    return output_path
+
+def generate_simple_report(charts, title=None, output_filename=None):
+    """
+    Generate a simple PDF report (wrapper with defaults)
+    
+    Args:
+        charts: List of chart file paths
+        title: Report title
+        output_filename: Output filename
+    
+    Returns:
+        Path: Path to generated PDF
+    """
+    return generate_pdf_report(
+        charts=charts,
+        title=title,
+        output_filename=output_filename
+    )
+
+# ============================================================================
+# EXAMPLE USAGE
+# ============================================================================
+
+if __name__ == "__main__":
+    """Example usage"""
+    from config import OUTPUT_DIR
+    
+    # Find charts in output directory
+    chart_files = list(OUTPUT_DIR.glob('*.png'))
+    
+    if chart_files:
+        print(f"Found {len(chart_files)} charts")
+        
+        # Generate report
+        report_path = generate_pdf_report(
+            charts=[str(f) for f in chart_files[:5]],  # Limit to 5 charts
+            title="Sales Analysis Report",
+            summary_data={
+                'Total Charts': len(chart_files),
+                'Report Date': datetime.now().strftime('%Y-%m-%d')
+            }
+        )
+        
+        print(f"Report saved to: {report_path}")
+    else:
+        print("No charts found in output directory")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7ac385a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,30 @@
+# Python dependencies for Sales Analysis Template
+# Install with: pip install -r requirements.txt
+
+# Core data analysis
+pandas>=2.0.0
+numpy>=1.24.0
+
+# Visualization
+matplotlib>=3.7.0
+seaborn>=0.12.0
+
+# Export utilities (optional - uncomment if needed)
+# openpyxl>=3.1.0  # For Excel export (export_utils.py)
+
+# Interactive visualizations (optional - uncomment if needed)
+# plotly>=5.17.0  # For interactive charts (analysis_utils.py)
+
+# Report generation (optional - uncomment if needed)
+# reportlab>=4.0.0  # For PDF reports (report_generator.py)
+
+# Statistical analysis (optional - uncomment if needed)
+# scipy>=1.10.0  # For statistical analysis, product lifecycle (statistical_utils.py)
+
+# Testing (optional - uncomment if needed)
+# pytest>=7.4.0  # For unit tests
+
+# Advanced analysis (optional - uncomment if needed)
+# pmdarima>=2.0.0  # For time series forecasting
+# mlxtend>=0.22.0  # For market basket analysis
+# scikit-learn>=1.3.0  # For machine learning analyses
diff --git a/run_all_analyses.py b/run_all_analyses.py
new file mode 100644
index 0000000..c756522
--- /dev/null
+++ b/run_all_analyses.py
@@ -0,0 +1,185 @@
+"""
+Batch runner for all analysis scripts
+Runs all analyses in sequence and generates a summary report
+
+To use:
+1. Add your analysis scripts to the ANALYSIS_SCRIPTS list below
+2. Run: python run_all_analyses.py
+"""
+import subprocess
+import sys
+from pathlib import Path
+from datetime import datetime
+import time
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+# List of analysis scripts to run
+# TODO: Add your analysis scripts here
+ANALYSIS_SCRIPTS = [
+    # Example structure - customize for your analyses:
+    # 'check_annual_revenue.py',
+    # 'revenue_analysis.py',
+    # 'geographic_analysis.py',
+    # 'customer_segmentation.py',
+    # 'product_analysis.py',
+    # Add your analysis scripts here...
+]
+
+# Timeout per script (in seconds)
+SCRIPT_TIMEOUT = 600  # 10 minutes
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+def run_script(script_path):
+    """Run a single analysis script"""
+    script_name = Path(script_path).name
+    print(f"\n{'='*60}")
+    print(f"Running: {script_name}")
+    print(f"{'='*60}")
+    
+    start_time = time.time()
+    
+    try:
+        result = subprocess.run(
+            [sys.executable, script_path],
+            capture_output=True,
+            text=True,
+            timeout=SCRIPT_TIMEOUT
+        )
+        
+        elapsed = time.time() - start_time
+        
+        if result.returncode == 0:
+            print(f"✅ {script_name} completed successfully ({elapsed:.1f}s)")
+            if result.stdout:
+                # Print last 10 lines of output
+                lines = result.stdout.strip().split('\n')
+                if len(lines) > 10:
+                    print("  ... (output truncated)")
+                    for line in lines[-10:]:
+                        print(f"  {line}")
+                else:
+                    for line in lines:
+                        print(f"  {line}")
+            return True, elapsed, None
+        else:
+            print(f"❌ {script_name} failed ({elapsed:.1f}s)")
+            if result.stderr:
+                print(f"  Error: {result.stderr[:500]}")
+            return False, elapsed, result.stderr
+            
+    except subprocess.TimeoutExpired:
+        elapsed = time.time() - start_time
+        print(f"⏱️  {script_name} timed out after {elapsed:.1f}s")
+        return False, elapsed, "Timeout"
+    except Exception as e:
+        elapsed = time.time() - start_time
+        print(f"❌ {script_name} error: {str(e)}")
+        return False, elapsed, str(e)
+
+# ============================================================================
+# MAIN FUNCTION
+# ============================================================================
+
+def main():
+    """Run all analysis scripts"""
+    from config import COMPANY_NAME
+    
+    print(f"\n{'='*60}")
+    print(f"{COMPANY_NAME} Sales Analysis - Batch Runner")
+    print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"{'='*60}\n")
+    
+    # Check which scripts exist
+    existing_scripts = []
+    missing_scripts = []
+    
+    for script in ANALYSIS_SCRIPTS:
+        script_path = Path(script)
+        if script_path.exists():
+            existing_scripts.append(script)
+        else:
+            missing_scripts.append(script)
+    
+    if missing_scripts:
+        print(f"⚠️  Warning: {len(missing_scripts)} scripts not found:")
+        for script in missing_scripts:
+            print(f"   - {script}")
+        print()
+    
+    if not existing_scripts:
+        print("❌ No analysis scripts found!")
+        print("   Please add analysis scripts to ANALYSIS_SCRIPTS list in run_all_analyses.py")
+        return
+    
+    print(f"Found {len(existing_scripts)} analysis scripts to run\n")
+    
+    # Run scripts
+    results = []
+    total_start = time.time()
+    
+    for script in existing_scripts:
+        success, elapsed, error = run_script(script)
+        results.append({
+            'script': script,
+            'success': success,
+            'elapsed': elapsed,
+            'error': error
+        })
+    
+    total_elapsed = time.time() - total_start
+    
+    # Print summary
+    print(f"\n{'='*60}")
+    print("Batch Run Summary")
+    print(f"{'='*60}\n")
+    
+    successful = [r for r in results if r['success']]
+    failed = [r for r in results if not r['success']]
+    
+    print(f"Total scripts: {len(results)}")
+    print(f"✅ Successful: {len(successful)}")
+    print(f"❌ Failed: {len(failed)}")
+    print(f"⏱️  Total time: {total_elapsed/60:.1f} minutes\n")
+    
+    if failed:
+        print("Failed scripts:")
+        for r in failed:
+            print(f"  ❌ {r['script']} ({r['elapsed']:.1f}s)")
+            if r['error']:
+                print(f"     Error: {r['error'][:100]}")
+        print()
+    
+    # Save summary to file
+    summary_file = Path('analysis_run_summary.txt')
+    with open(summary_file, 'w') as f:
+        f.write(f"{COMPANY_NAME} Sales Analysis - Batch Run Summary\n")
+        f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+        f.write(f"{'='*60}\n\n")
+        f.write(f"Total scripts: {len(results)}\n")
+        f.write(f"Successful: {len(successful)}\n")
+        f.write(f"Failed: {len(failed)}\n")
+        f.write(f"Total time: {total_elapsed/60:.1f} minutes\n\n")
+        
+        if successful:
+            f.write("Successful scripts:\n")
+            for r in successful:
+                f.write(f"  ✅ {r['script']} ({r['elapsed']:.1f}s)\n")
+            f.write("\n")
+        
+        if failed:
+            f.write("Failed scripts:\n")
+            for r in failed:
+                f.write(f"  ❌ {r['script']} ({r['elapsed']:.1f}s)\n")
+                if r['error']:
+                    f.write(f"     Error: {r['error']}\n")
+    
+    print(f"Summary saved to: {summary_file}")
+
+if __name__ == "__main__":
+    main()
diff --git a/setup_wizard.py b/setup_wizard.py
new file mode 100644
index 0000000..a4f0fc3
--- /dev/null
+++ b/setup_wizard.py
@@ -0,0 +1,240 @@
+"""
+Interactive setup wizard for configuring the sales analysis template
+Asks clarifying questions to configure config.py for your specific company and data
+"""
+import os
+import sys
+from pathlib import Path
+
+def print_header(text):
+    """Print a formatted header"""
+    print("\n" + "="*70)
+    print(f"  {text}")
+    print("="*70 + "\n")
+
+def ask_question(prompt, default=None, validator=None):
+    """
+    Ask a question and return the answer
+    
+    Args:
+        prompt: Question to ask
+        default: Default value if user just presses Enter
+        validator: Optional function to validate input
+    
+    Returns:
+        User's answer (or default)
+    """
+    if default:
+        full_prompt = f"{prompt} [{default}]: "
+    else:
+        full_prompt = f"{prompt}: "
+    
+    while True:
+        answer = input(full_prompt).strip()
+        if not answer and default:
+            return default
+        elif not answer:
+            print("  Please provide an answer.")
+            continue
+        
+        if validator:
+            try:
+                return validator(answer)
+            except Exception as e:
+                print(f"  Invalid input: {e}")
+                continue
+        
+        return answer
+
+def validate_yes_no(answer):
+    """Validate yes/no answer"""
+    answer_lower = answer.lower()
+    if answer_lower in ['y', 'yes', 'true', '1']:
+        return True
+    elif answer_lower in ['n', 'no', 'false', '0']:
+        return False
+    else:
+        raise ValueError("Please answer 'yes' or 'no'")
+
+def validate_int(answer):
+    """Validate integer answer"""
+    return int(answer)
+
+def validate_file_exists(answer):
+    """Validate that file exists"""
+    if not Path(answer).exists():
+        raise ValueError(f"File not found: {answer}")
+    return answer
+
+def main():
+    """Run the setup wizard"""
+    print_header("Sales Analysis Template - Setup Wizard")
+    print("This wizard will help you configure the template for your company's data.")
+    print("You can press Enter to accept defaults (shown in brackets).\n")
+    
+    responses = {}
+    
+    # Company Information
+    print_header("Company Information")
+    responses['company_name'] = ask_question("Company Name", default="Your Company Name")
+    responses['analysis_date'] = ask_question("Analysis Date (YYYY-MM-DD)", default="2026-01-12")
+    
+    # Data File
+    print_header("Data File Configuration")
+    print("Where is your sales data CSV file located?")
+    data_file = ask_question("Data file name (e.g., sales_data.csv)", default="sales_data.csv")
+    
+    # Check if file exists
+    if Path(data_file).exists():
+        print(f"  ✓ Found: {data_file}")
+    else:
+        print(f"  ⚠ Warning: {data_file} not found. Make sure to place it in the template directory.")
+    
+    responses['data_file'] = data_file
+    
+    # Column Mapping
+    print_header("Column Mapping")
+    print("What are the column names in your CSV file?")
+    print("(Press Enter to accept defaults if your columns match common names)\n")
+    
+    responses['revenue_column'] = ask_question("Revenue/Amount column name", default="USD")
+    responses['date_column'] = ask_question("Primary date column name", default="InvoiceDate")
+    
+    has_fallback = ask_question("Do you have fallback date columns (Month, Year)?", default="yes", validator=validate_yes_no)
+    if has_fallback:
+        fallback_str = ask_question("Fallback date columns (comma-separated)", default="Month, Year")
+        responses['date_fallback'] = [col.strip() for col in fallback_str.split(',')]
+    else:
+        responses['date_fallback'] = []
+    
+    responses['customer_column'] = ask_question("Customer/Account column name", default="Customer")
+    responses['item_column'] = ask_question("Item/Product column name", default="Item")
+    
+    has_quantity = ask_question("Do you have a Quantity column?", default="yes", validator=validate_yes_no)
+    if has_quantity:
+        responses['quantity_column'] = ask_question("Quantity column name", default="Quantity")
+    else:
+        responses['quantity_column'] = None
+    
+    # Date Range
+    print_header("Date Range Configuration")
+    responses['min_year'] = ask_question("Minimum year to include in analysis", default="2021", validator=validate_int)
+    responses['max_date'] = ask_question("Maximum date (YYYY-MM-DD)", default="2025-09-30")
+    
+    years_str = ask_question("Analysis years (comma-separated, e.g., 2021,2022,2023,2024,2025)", default="2021,2022,2023,2024,2025")
+    responses['analysis_years'] = [int(y.strip()) for y in years_str.split(',')]
+    
+    # LTM Configuration
+    print_header("LTM (Last Twelve Months) Configuration")
+    print("LTM is used for the most recent partial year to enable apples-to-apples comparison.")
+    print("Example: If your latest data is through September 2025, use Oct 2024 - Sep 2025.\n")
+    
+    use_ltm = ask_question("Do you need LTM for the most recent year?", default="yes", validator=validate_yes_no)
+    responses['ltm_enabled'] = use_ltm
+    
+    if use_ltm:
+        responses['ltm_start_month'] = ask_question("LTM start month (1-12)", default="10", validator=validate_int)
+        responses['ltm_start_year'] = ask_question("LTM start year", default="2024", validator=validate_int)
+        responses['ltm_end_month'] = ask_question("LTM end month (1-12)", default="9", validator=validate_int)
+        responses['ltm_end_year'] = ask_question("LTM end year", default="2025", validator=validate_int)
+    else:
+        responses['ltm_start_month'] = 10
+        responses['ltm_start_year'] = 2024
+        responses['ltm_end_month'] = 9
+        responses['ltm_end_year'] = 2025
+    
+    # Exclusion Filters
+    print_header("Exclusion Filters (Optional)")
+    use_exclusions = ask_question("Do you need to exclude specific segments (e.g., test accounts, business units)?", default="no", validator=validate_yes_no)
+    responses['exclusions_enabled'] = use_exclusions
+    
+    if use_exclusions:
+        responses['exclude_column'] = ask_question("Column name to filter on", default="Country")
+        exclude_values_str = ask_question("Values to exclude (comma-separated)", default="")
+        responses['exclude_values'] = [v.strip() for v in exclude_values_str.split(',') if v.strip()]
+    else:
+        responses['exclude_column'] = None
+        responses['exclude_values'] = []
+    
+    # Generate config.py
+    print_header("Generating Configuration")
+    print("Updating config.py with your settings...")
+    
+    # Read current config.py
+    config_path = Path('config.py')
+    if not config_path.exists():
+        print("ERROR: config.py not found!")
+        return
+    
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config_content = f.read()
+    
+    # Replace values
+    replacements = {
+        "COMPANY_NAME = \"Your Company Name\"": f"COMPANY_NAME = \"{responses['company_name']}\"",
+        "ANALYSIS_DATE = \"2026-01-12\"": f"ANALYSIS_DATE = \"{responses['analysis_date']}\"",
+        "DATA_FILE = 'sales_data.csv'": f"DATA_FILE = '{responses['data_file']}'",
+        "REVENUE_COLUMN = 'USD'": f"REVENUE_COLUMN = '{responses['revenue_column']}'",
+        "DATE_COLUMN = 'InvoiceDate'": f"DATE_COLUMN = '{responses['date_column']}'",
+        "DATE_FALLBACK_COLUMNS = ['Month', 'Year']": f"DATE_FALLBACK_COLUMNS = {responses['date_fallback']}",
+        "CUSTOMER_COLUMN = 'Customer'": f"CUSTOMER_COLUMN = '{responses['customer_column']}'",
+        "ITEM_COLUMN = 'Item'": f"ITEM_COLUMN = '{responses['item_column']}'",
+        "QUANTITY_COLUMN = 'Quantity'": f"QUANTITY_COLUMN = '{responses['quantity_column']}'" if responses['quantity_column'] else "QUANTITY_COLUMN = None",
+        "MIN_YEAR = 2021": f"MIN_YEAR = {responses['min_year']}",
+        "MAX_DATE = pd.Timestamp('2025-09-30')": f"MAX_DATE = pd.Timestamp('{responses['max_date']}')",
+        "ANALYSIS_YEARS = [2021, 2022, 2023, 2024, 2025]": f"ANALYSIS_YEARS = {responses['analysis_years']}",
+        "LTM_ENABLED = True": f"LTM_ENABLED = {responses['ltm_enabled']}",
+        "LTM_START_MONTH = 10": f"LTM_START_MONTH = {responses['ltm_start_month']}",
+        "LTM_START_YEAR = 2024": f"LTM_START_YEAR = {responses['ltm_start_year']}",
+        "LTM_END_MONTH = 9": f"LTM_END_MONTH = {responses['ltm_end_month']}",
+        "LTM_END_YEAR = 2025": f"LTM_END_YEAR = {responses['ltm_end_year']}",
+    }
+    
+    # Handle exclusions
+    if responses['exclusions_enabled']:
+        exclusions_config = f"""EXCLUSION_FILTERS = {{
+    'enabled': True,
+    'exclude_by_column': '{responses['exclude_column']}',
+    'exclude_values': {responses['exclude_values']}
+}}"""
+        # Replace the exclusion filters section
+        import re
+        pattern = r"EXCLUSION_FILTERS = \{.*?\}"
+        config_content = re.sub(pattern, exclusions_config, config_content, flags=re.DOTALL)
+    else:
+        exclusions_config = """EXCLUSION_FILTERS = {
+    'enabled': False,
+    'exclude_by_column': None,
+    'exclude_values': []
+}"""
+        import re
+        pattern = r"EXCLUSION_FILTERS = \{.*?\}"
+        config_content = re.sub(pattern, exclusions_config, config_content, flags=re.DOTALL)
+    
+    # Apply replacements
+    for old, new in replacements.items():
+        if old in config_content:
+            config_content = config_content.replace(old, new)
+    
+    # Write updated config
+    with open(config_path, 'w', encoding='utf-8') as f:
+        f.write(config_content)
+    
+    print("  ✓ Configuration updated successfully!")
+    
+    # Summary
+    print_header("Setup Complete")
+    print("Your configuration has been saved to config.py")
+    print("\nNext steps:")
+    print("1. Place your data file in the template directory (if not already there)")
+    print("2. Test data loading: python -c \"from data_loader import load_sales_data; from config import get_data_path; df = load_sales_data(get_data_path()); print(f'Loaded {len(df):,} rows')\"")
+    print("3. Review config.py and adjust any settings as needed")
+    print("4. Start creating your analysis scripts using analysis_template.py")
+    print("\nFor help, see README.md")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n\nSetup cancelled by user.")
+        sys.exit(0)
diff --git a/statistical_utils.py b/statistical_utils.py
new file mode 100644
index 0000000..c93b58f
--- /dev/null
+++ b/statistical_utils.py
@@ -0,0 +1,321 @@
+"""
+Statistical analysis utilities
+Common statistical operations for sales analysis
+
+Usage:
+    from statistical_utils import calculate_yoy_growth, calculate_cagr, calculate_correlation
+    
+    # Calculate year-over-year growth
+    growth = calculate_yoy_growth(current_value=100, previous_value=90)
+    
+    # Calculate CAGR
+    cagr = calculate_cagr(start_value=100, end_value=150, periods=3)
+"""
+import pandas as pd
+import numpy as np
+from scipy import stats
+
+def calculate_yoy_growth(current, previous):
+    """
+    Calculate year-over-year growth percentage
+    
+    Args:
+        current: Current period value
+        previous: Previous period value
+    
+    Returns:
+        float: Growth percentage (can be negative)
+    
+    Example:
+        calculate_yoy_growth(110, 100)  # Returns 10.0
+        calculate_yoy_growth(90, 100)   # Returns -10.0
+    """
+    if previous == 0:
+        return np.nan if current == 0 else np.inf
+    
+    return ((current - previous) / previous) * 100
+
+def calculate_cagr(start_value, end_value, periods):
+    """
+    Calculate Compound Annual Growth Rate (CAGR)
+    
+    Args:
+        start_value: Starting value
+        end_value: Ending value
+        periods: Number of periods (years)
+    
+    Returns:
+        float: CAGR as percentage
+    
+    Example:
+        calculate_cagr(100, 150, 3)  # Returns ~14.47%
+    """
+    if start_value <= 0 or periods <= 0:
+        return np.nan
+    
+    if end_value <= 0:
+        return np.nan
+    
+    cagr = ((end_value / start_value) ** (1 / periods) - 1) * 100
+    return cagr
+
+def calculate_correlation(df, col1, col2):
+    """
+    Calculate correlation between two columns
+    
+    Args:
+        df: DataFrame
+        col1: First column name
+        col2: Second column name
+    
+    Returns:
+        float: Correlation coefficient (-1 to 1)
+    """
+    if col1 not in df.columns or col2 not in df.columns:
+        return np.nan
+    
+    # Convert to numeric
+    series1 = pd.to_numeric(df[col1], errors='coerce')
+    series2 = pd.to_numeric(df[col2], errors='coerce')
+    
+    # Remove NaN pairs
+    valid_mask = series1.notna() & series2.notna()
+    if valid_mask.sum() < 2:
+        return np.nan
+    
+    correlation = series1[valid_mask].corr(series2[valid_mask])
+    return correlation
+
+def calculate_trend_slope(y_values):
+    """
+    Calculate linear trend slope
+    
+    Args:
+        y_values: Array-like of y values
+    
+    Returns:
+        float: Slope of linear trend
+    """
+    if len(y_values) < 2:
+        return np.nan
+    
+    x_values = np.arange(len(y_values))
+    
+    # Remove NaN values
+    valid_mask = ~np.isnan(y_values)
+    if valid_mask.sum() < 2:
+        return np.nan
+    
+    x_valid = x_values[valid_mask]
+    y_valid = y_values[valid_mask]
+    
+    slope, intercept, r_value, p_value, std_err = stats.linregress(x_valid, y_valid)
+    return slope
+
+def calculate_percent_change(series, periods=1):
+    """
+    Calculate percent change over periods
+    
+    Args:
+        series: Pandas Series
+        periods: Number of periods to shift (default: 1)
+    
+    Returns:
+        Series: Percent change
+    """
+    return series.pct_change(periods=periods) * 100
+
+def calculate_moving_average(series, window=3):
+    """
+    Calculate moving average
+    
+    Args:
+        series: Pandas Series
+        window: Window size for moving average
+    
+    Returns:
+        Series: Moving average
+    """
+    return series.rolling(window=window, center=False).mean()
+
+def calculate_volatility(series, window=12):
+    """
+    Calculate rolling volatility (standard deviation)
+    
+    Args:
+        series: Pandas Series
+        window: Window size for rolling calculation
+    
+    Returns:
+        Series: Rolling volatility
+    """
+    return series.rolling(window=window, center=False).std()
+
+def calculate_z_score(value, mean, std):
+    """
+    Calculate z-score
+    
+    Args:
+        value: Value to score
+        mean: Mean of distribution
+        std: Standard deviation of distribution
+    
+    Returns:
+        float: Z-score
+    """
+    if std == 0:
+        return np.nan
+    
+    return (value - mean) / std
+
+def test_statistical_significance(group1, group2, alpha=0.05):
+    """
+    Test statistical significance between two groups (t-test)
+    
+    Args:
+        group1: First group (array-like)
+        group2: Second group (array-like)
+        alpha: Significance level (default: 0.05)
+    
+    Returns:
+        dict: Test results with p-value, significant flag, etc.
+    """
+    group1 = np.array(group1)
+    group2 = np.array(group2)
+    
+    # Remove NaN values
+    group1 = group1[~np.isnan(group1)]
+    group2 = group2[~np.isnan(group2)]
+    
+    if len(group1) < 2 or len(group2) < 2:
+        return {
+            'p_value': np.nan,
+            'significant': False,
+            'test_statistic': np.nan,
+            'error': 'Insufficient data'
+        }
+    
+    # Perform t-test
+    t_statistic, p_value = stats.ttest_ind(group1, group2)
+    
+    return {
+        'p_value': float(p_value),
+        'significant': p_value < alpha,
+        'test_statistic': float(t_statistic),
+        'alpha': alpha,
+        'group1_mean': float(np.mean(group1)),
+        'group2_mean': float(np.mean(group2)),
+        'group1_std': float(np.std(group1)),
+        'group2_std': float(np.std(group2))
+    }
+
+def calculate_confidence_interval(series, confidence=0.95):
+    """
+    Calculate confidence interval for a series
+    
+    Args:
+        series: Pandas Series
+        confidence: Confidence level (default: 0.95 for 95%)
+    
+    Returns:
+        dict: Mean, lower bound, upper bound
+    """
+    series_clean = series.dropna()
+    
+    if len(series_clean) == 0:
+        return {
+            'mean': np.nan,
+            'lower': np.nan,
+            'upper': np.nan,
+            'confidence': confidence
+        }
+    
+    mean = series_clean.mean()
+    std = series_clean.std()
+    n = len(series_clean)
+    
+    # Calculate standard error
+    se = std / np.sqrt(n)
+    
+    # Calculate critical value (z-score for normal distribution)
+    alpha = 1 - confidence
+    z_critical = stats.norm.ppf(1 - alpha/2)
+    
+    margin = z_critical * se
+    
+    return {
+        'mean': float(mean),
+        'lower': float(mean - margin),
+        'upper': float(mean + margin),
+        'confidence': confidence,
+        'margin': float(margin)
+    }
+
+def calculate_annual_growth_rates(values, years):
+    """
+    Calculate year-over-year growth rates for annual data
+    
+    Args:
+        values: Array-like of annual values
+        years: Array-like of corresponding years
+    
+    Returns:
+        DataFrame: Years, values, and growth rates
+    """
+    df = pd.DataFrame({
+        'Year': years,
+        'Value': values
+    })
+    
+    df['YoY_Growth'] = calculate_percent_change(df['Value'])
+    df['YoY_Change'] = df['Value'].diff()
+    
+    return df
+
+def calculate_seasonality_index(monthly_series):
+    """
+    Calculate seasonality index for monthly data
+    
+    Args:
+        monthly_series: Series with datetime index (monthly frequency)
+    
+    Returns:
+        Series: Seasonality index (1.0 = average, >1.0 = above average, <1.0 = below average)
+    """
+    if not isinstance(monthly_series.index, pd.DatetimeIndex):
+        raise ValueError("Series must have DatetimeIndex")
+    
+    # Extract month
+    monthly_series = monthly_series.copy()
+    monthly_series['Month'] = monthly_series.index.month
+    
+    # Calculate average by month
+    monthly_avg = monthly_series.groupby('Month').mean()
+    overall_avg = monthly_series.mean()
+    
+    # Calculate seasonality index
+    seasonality = monthly_avg / overall_avg
+    
+    return seasonality
+
+# ============================================================================
+# EXAMPLE USAGE
+# ============================================================================
+
+if __name__ == "__main__":
+    """Example usage"""
+    # YoY Growth
+    growth = calculate_yoy_growth(110, 100)
+    print(f"Year-over-year growth: {growth:.2f}%")
+    
+    # CAGR
+    cagr = calculate_cagr(100, 150, 3)
+    print(f"CAGR: {cagr:.2f}%")
+    
+    # Sample data for correlation
+    df = pd.DataFrame({
+        'Revenue': [100, 110, 120, 130, 140],
+        'Quantity': [10, 11, 12, 13, 14]
+    })
+    corr = calculate_correlation(df, 'Revenue', 'Quantity')
+    print(f"Correlation: {corr:.2f}")
diff --git a/tests/test_analysis_utils.py b/tests/test_analysis_utils.py
new file mode 100644
index 0000000..808276f
--- /dev/null
+++ b/tests/test_analysis_utils.py
@@ -0,0 +1,85 @@
+"""
+Unit tests for analysis_utils.py
+"""
+import pytest
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import sys
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from analysis_utils import (
+    millions_formatter, thousands_formatter,
+    get_millions_formatter, get_thousands_formatter,
+    format_currency, calculate_price_per_unit,
+    sort_mixed_years, safe_year_labels
+)
+
+class TestFormatters:
+    """Test formatting functions"""
+    
+    def test_millions_formatter(self):
+        """Test millions formatter"""
+        assert millions_formatter(10.5, None) == '$10.5m'
+        assert millions_formatter(0, None) == '$0.0m'
+        assert millions_formatter(100.0, None) == '$100.0m'
+    
+    def test_thousands_formatter(self):
+        """Test thousands formatter"""
+        assert thousands_formatter(10.5, None) == '$10.5k'
+        assert thousands_formatter(0, None) == '$0.0k'
+    
+    def test_format_currency(self):
+        """Test currency formatting"""
+        assert format_currency(1000000) == '$1.00m'
+        assert format_currency(1000, millions=False) == '$1.00k'
+        assert format_currency(np.nan) == 'N/A'
+
+class TestPriceCalculation:
+    """Test price calculation functions"""
+    
+    def test_calculate_price_per_unit(self):
+        """Test price per unit calculation"""
+        df = pd.DataFrame({
+            'Quantity': [10, 20, 30],
+            'Revenue': [100, 200, 300]
+        })
+        
+        price = calculate_price_per_unit(df, 'Quantity', 'Revenue')
+        assert price == 10.0  # (100+200+300) / (10+20+30)
+    
+    def test_calculate_price_per_unit_with_outliers(self):
+        """Test price calculation excludes outliers"""
+        df = pd.DataFrame({
+            'Quantity': [10, 20, 30, 2000],  # 2000 is outlier
+            'Revenue': [100, 200, 300, 10000]
+        })
+        
+        # Should exclude quantity > 1000 by default
+        price = calculate_price_per_unit(df, 'Quantity', 'Revenue')
+        assert price == 10.0  # Only first 3 rows
+
+class TestYearHandling:
+    """Test year handling functions"""
+    
+    def test_sort_mixed_years(self):
+        """Test sorting mixed int/str years"""
+        df = pd.DataFrame({
+            'Year': [2023, '2025 (LTM)', 2024, 2022],
+            'Value': [100, 150, 120, 90]
+        })
+        
+        sorted_df = sort_mixed_years(df, 'Year')
+        assert sorted_df['Year'].iloc[0] == 2022
+        assert sorted_df['Year'].iloc[-1] == '2025 (LTM)'
+    
+    def test_safe_year_labels(self):
+        """Test year label conversion"""
+        years = [2021, 2022, '2025 (LTM)']
+        labels = safe_year_labels(years)
+        assert labels == ['2021', '2022', '2025 (LTM)']
+
+if __name__ == "__main__":
+    pytest.main([__file__, '-v'])
diff --git a/tests/test_config_validator.py b/tests/test_config_validator.py
new file mode 100644
index 0000000..a23be3e
--- /dev/null
+++ b/tests/test_config_validator.py
@@ -0,0 +1,45 @@
+"""
+Unit tests for config_validator.py
+"""
+import pytest
+import pandas as pd
+from pathlib import Path
+import sys
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from config_validator import validate_config
+
+class TestConfigValidator:
+    """Test configuration validation"""
+    
+    def test_validate_config_missing_column(self):
+        """Test validation catches missing columns"""
+        df = pd.DataFrame({
+            'SomeColumn': [1, 2, 3]
+        })
+        
+        errors, warnings = validate_config(df)
+        
+        # Should have errors for missing required columns
+        assert len(errors) > 0
+        assert any('not found' in error.lower() for error in errors)
+    
+    def test_validate_config_valid_data(self):
+        """Test validation with valid data"""
+        df = pd.DataFrame({
+            'InvoiceDate': pd.to_datetime(['2023-01-01', '2023-02-01']),
+            'USD': [100.0, 200.0],
+            'Year': [2023, 2023]
+        })
+        
+        errors, warnings = validate_config(df)
+        
+        # Should have minimal errors (may have warnings about missing optional columns)
+        # But should not have critical errors if basic structure is correct
+        critical_errors = [e for e in errors if 'not found' in e.lower() and 'USD' in e or 'InvoiceDate' in e]
+        assert len(critical_errors) == 0
+
+if __name__ == "__main__":
+    pytest.main([__file__, '-v'])
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
new file mode 100644
index 0000000..da073a2
--- /dev/null
+++ b/tests/test_data_loader.py
@@ -0,0 +1,68 @@
+"""
+Integration tests for data_loader.py
+"""
+import pytest
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import sys
+import tempfile
+import os
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from data_loader import load_sales_data, validate_data_structure
+
+class TestDataLoader:
+    """Test data loading functions"""
+    
+    def test_load_sales_data_basic(self):
+        """Test basic data loading"""
+        # Create temporary CSV
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
+            f.write('InvoiceDate,USD,Customer\n')
+            f.write('2023-01-01,100.0,Customer1\n')
+            f.write('2023-02-01,200.0,Customer2\n')
+            temp_path = f.name
+        
+        try:
+            # Temporarily update config
+            import config
+            original_data_file = config.DATA_FILE
+            config.DATA_FILE = Path(temp_path).name
+            
+            df = load_sales_data(Path(temp_path))
+            
+            assert len(df) == 2
+            assert 'Year' in df.columns
+            assert 'YearMonth' in df.columns
+            
+            # Restore config
+            config.DATA_FILE = original_data_file
+        finally:
+            os.unlink(temp_path)
+    
+    def test_validate_data_structure(self):
+        """Test data structure validation"""
+        # Valid DataFrame
+        df_valid = pd.DataFrame({
+            'InvoiceDate': pd.to_datetime(['2023-01-01', '2023-02-01']),
+            'USD': [100.0, 200.0]
+        })
+        
+        is_valid, msg = validate_data_structure(df_valid)
+        assert is_valid
+        assert msg == "OK"
+        
+        # Invalid DataFrame (missing column)
+        df_invalid = pd.DataFrame({
+            'InvoiceDate': pd.to_datetime(['2023-01-01'])
+        })
+        
+        is_valid, msg = validate_data_structure(df_invalid)
+        assert not is_valid
+        assert 'Missing required column' in msg
+
+if __name__ == "__main__":
+    pytest.main([__file__, '-v'])
diff --git a/validate_revenue.py b/validate_revenue.py
new file mode 100644
index 0000000..629c1c3
--- /dev/null
+++ b/validate_revenue.py
@@ -0,0 +1,95 @@
+"""
+Revenue validation utility
+Validates that revenue calculations are consistent across analyses
+"""
+import pandas as pd
+from config import (
+    REVENUE_COLUMN, ANALYSIS_YEARS, VALIDATION_ENABLED,
+    EXPECTED_REVENUE, REVENUE_TOLERANCE_PCT, LTM_ENABLED,
+    get_ltm_period
+)
+from analysis_utils import get_annual_data
+
+def validate_revenue(dataframe: pd.DataFrame, analysis_name: str = "Analysis") -> None:
+    """
+    Print annual revenue summary for validation.
+    
+    This function helps ensure that:
+    1. Data loading is working correctly
+    2. Revenue calculations are consistent
+    3. Filters are not accidentally excluding too much data
+    
+    Args:
+        dataframe: DataFrame with revenue and date columns (should have REVENUE_COLUMN and Year)
+        analysis_name: Name of the analysis (for logging/display)
+    
+    Example:
+        >>> validate_revenue(df, "Revenue Analysis")
+        >>> # Prints annual revenue summary by year
+    """
+    df = dataframe.copy()
+    
+    # Ensure date column is datetime
+    from config import DATE_COLUMN
+    if DATE_COLUMN in df.columns:
+        df[DATE_COLUMN] = pd.to_datetime(df[DATE_COLUMN], errors='coerce', format='mixed')
+    
+    # Filter to analysis years
+    df = df[df['Year'].isin(ANALYSIS_YEARS)]
+    
+    # Calculate annual revenue
+    annual_revenue = {}
+    ltm_start, ltm_end = get_ltm_period() if LTM_ENABLED else (None, None)
+    
+    for year in sorted(ANALYSIS_YEARS):
+        if year in df['Year'].unique():
+            year_data, year_label = get_annual_data(df, year, ltm_start, ltm_end)
+            if len(year_data) > 0:
+                revenue = year_data[REVENUE_COLUMN].sum()
+                annual_revenue[year_label] = revenue
+    
+    # Print summary
+    print(f"\n{'='*60}")
+    print(f"Annual Revenue Validation - {analysis_name}")
+    print(f"{'='*60}")
+    
+    if annual_revenue:
+        for year_label, revenue in annual_revenue.items():
+            formatted = f"${revenue / 1e6:.2f}m"
+            print(f"  {year_label}: {formatted}")
+        
+        # Validation against expected values
+        if VALIDATION_ENABLED and EXPECTED_REVENUE:
+            print(f"\nValidation Check:")
+            all_valid = True
+            for year_label, actual_revenue in annual_revenue.items():
+                # Try to match year label to expected revenue
+                year_key = None
+                if isinstance(year_label, str):
+                    # Extract year number from label (e.g., "2025 (LTM 9/2025)" -> 2025)
+                    import re
+                    year_match = re.search(r'(\d{4})', str(year_label))
+                    if year_match:
+                        year_key = int(year_match.group(1))
+                else:
+                    year_key = year_label
+                
+                if year_key in EXPECTED_REVENUE:
+                    expected = EXPECTED_REVENUE[year_key]
+                    tolerance = expected * REVENUE_TOLERANCE_PCT
+                    diff = abs(actual_revenue - expected)
+                    
+                    if diff <= tolerance:
+                        print(f"  ✓ {year_label}: Within tolerance ({diff/1e6:.2f}m difference)")
+                    else:
+                        print(f"  ✗ {year_label}: Outside tolerance (expected ${expected/1e6:.2f}m, got ${actual_revenue/1e6:.2f}m, diff: ${diff/1e6:.2f}m)")
+                        all_valid = False
+            
+            if all_valid:
+                print("  All validations passed!")
+            else:
+                print("  WARNING: Some validations failed. Check data loading and filters.")
+    else:
+        print("  No revenue data found for analysis years")
+    
+    print(f"{'='*60}\n")