Files
sales-data-analysis/.cursor/rules/advanced_analysis_patterns.md
Jonathan Pressnell cf0b596449 Initial commit: sales analysis template
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-06 09:16:34 -05:00

9.2 KiB
Raw Permalink Blame History

Advanced Analysis Patterns

This document provides patterns for sophisticated, production-grade analyses that leverage the full capabilities of the template framework.

Using Cursor AI Effectively

When working in Cursor, you can ask the AI to:

  • "Create a cohort analysis script using the template patterns"
  • "Add statistical significance testing to this analysis"
  • "Generate a multi-dimensional analysis with product, customer, and geography"
  • "Create a forecasting analysis with confidence intervals"

The AI will automatically use these patterns and utilities.

Advanced Analysis Types

1. Multi-Dimensional Analysis

Pattern: Analyze across multiple dimensions simultaneously (e.g., Product × Customer × Geography)

from data_loader import load_sales_data
from analysis_utils import calculate_annual_metrics, get_ltm_period_config
from config import REVENUE_COLUMN, ITEM_COLUMN, CUSTOMER_COLUMN, REGION_COLUMN

df = load_sales_data(get_data_path())

# Multi-dimensional pivot
pivot = df.pivot_table(
    index=[ITEM_COLUMN, CUSTOMER_COLUMN],
    columns=REGION_COLUMN,
    values=REVENUE_COLUMN,
    aggfunc='sum',
    fill_value=0
)

# Or use data_processing helper
from data_processing import create_pivot_table
pivot = create_pivot_table(
    df,
    index=[ITEM_COLUMN, CUSTOMER_COLUMN],
    columns=REGION_COLUMN,
    values=REVENUE_COLUMN
)

2. Cohort Analysis with Retention Metrics

Pattern: Track customer cohorts over time with retention and revenue metrics

from examples.cohort_analysis import create_cohorts, calculate_cohort_metrics

df_cohort = create_cohorts(df)
cohort_metrics = calculate_cohort_metrics(df_cohort)

# Calculate Net Revenue Retention (NRR)
nrr = cohort_metrics.groupby('Cohort').agg({
    'Revenue_Retention': lambda x: x.iloc[-1] if len(x) > 0 else 0
})

3. Statistical Significance Testing

Pattern: Compare segments with statistical tests

from statistical_utils import test_statistical_significance

# Compare two groups
group1 = df[df['Segment'] == 'A'][REVENUE_COLUMN]
group2 = df[df['Segment'] == 'B'][REVENUE_COLUMN]

result = test_statistical_significance(group1, group2)
if result['significant']:
    print(f"Significant difference (p={result['p_value']:.4f})")

4. Price-Volume-Mix (PVM) Decomposition

Pattern: Decompose revenue changes into price, volume, and mix effects

from config import QUANTITY_COLUMN, REVENUE_COLUMN

def pvm_decomposition(df_base, df_current):
    """Decompose revenue change into price, volume, mix effects"""
    base_price = df_base[REVENUE_COLUMN].sum() / df_base[QUANTITY_COLUMN].sum()
    current_price = df_current[REVENUE_COLUMN].sum() / df_current[QUANTITY_COLUMN].sum()
    
    base_volume = df_base[QUANTITY_COLUMN].sum()
    current_volume = df_current[QUANTITY_COLUMN].sum()
    
    # Price effect
    price_effect = (current_price - base_price) * base_volume
    
    # Volume effect
    volume_effect = (current_volume - base_volume) * base_price
    
    # Mix effect (residual)
    total_change = df_current[REVENUE_COLUMN].sum() - df_base[REVENUE_COLUMN].sum()
    mix_effect = total_change - price_effect - volume_effect
    
    return {
        'price_effect': price_effect,
        'volume_effect': volume_effect,
        'mix_effect': mix_effect,
        'total_change': total_change
    }

5. Time Series Forecasting

Pattern: Forecast future revenue with confidence intervals

from data_processing import prepare_time_series
from statistical_utils import calculate_confidence_interval

# Prepare time series
ts = prepare_time_series(df, freq='M')

# Simple forecast (extend trend)
from scipy import stats
x = np.arange(len(ts))
slope, intercept, r_value, p_value, std_err = stats.linregress(x, ts.values)

# Forecast next 12 months
future_x = np.arange(len(ts), len(ts) + 12)
forecast = slope * future_x + intercept

# Calculate confidence intervals
ci = calculate_confidence_interval(ts, confidence=0.95)

6. Customer Lifetime Value (CLV) Analysis

Pattern: Calculate CLV using historical data

from config import CUSTOMER_COLUMN, REVENUE_COLUMN, DATE_COLUMN

def calculate_clv(df, years=3):
    """Calculate customer lifetime value"""
    customer_metrics = df.groupby(CUSTOMER_COLUMN).agg({
        REVENUE_COLUMN: 'sum',
        DATE_COLUMN: ['min', 'max', 'count']
    }).reset_index()
    
    customer_metrics.columns = [CUSTOMER_COLUMN, 'Total_Revenue', 'First_Purchase', 'Last_Purchase', 'Order_Count']
    
    # Calculate customer age (years)
    customer_metrics['Customer_Age_Years'] = (
        (customer_metrics['Last_Purchase'] - customer_metrics['First_Purchase']).dt.days / 365.25
    )
    
    # Annual revenue
    customer_metrics['Annual_Revenue'] = customer_metrics['Total_Revenue'] / customer_metrics['Customer_Age_Years'].replace(0, 1)
    
    # Projected CLV
    customer_metrics['CLV'] = customer_metrics['Annual_Revenue'] * years
    
    return customer_metrics

7. Market Basket Analysis

Pattern: Find product associations and cross-sell opportunities

from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Prepare transaction data
transactions = df.groupby(INVOICE_NUMBER_COLUMN)[ITEM_COLUMN].apply(list).tolist()

# Encode transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

8. Segmentation with Machine Learning

Pattern: Advanced customer segmentation using clustering

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Prepare features
features = df.groupby(CUSTOMER_COLUMN).agg({
    REVENUE_COLUMN: ['sum', 'mean', 'count'],
    DATE_COLUMN: lambda x: (x.max() - x.min()).days
}).reset_index()
features.columns = [CUSTOMER_COLUMN, 'Total_Revenue', 'Avg_Order', 'Order_Count', 'Customer_Tenure']

# Scale features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features[['Total_Revenue', 'Avg_Order', 'Order_Count', 'Customer_Tenure']])

# Cluster
kmeans = KMeans(n_clusters=5, random_state=42)
features['Segment'] = kmeans.fit_predict(features_scaled)

9. Anomaly Detection

Pattern: Identify unusual patterns in data

from statistical_utils import calculate_z_score

# Calculate z-scores for revenue
mean_revenue = df[REVENUE_COLUMN].mean()
std_revenue = df[REVENUE_COLUMN].std()

df['Revenue_Z_Score'] = df[REVENUE_COLUMN].apply(
    lambda x: calculate_z_score(x, mean_revenue, std_revenue)
)

# Flag anomalies (|z| > 3)
df['Is_Anomaly'] = df['Revenue_Z_Score'].abs() > 3

10. Competitive Analysis Framework

Pattern: Compare performance across dimensions

from statistical_utils import calculate_yoy_growth, calculate_cagr

def competitive_analysis(df, dimension_col):
    """Compare performance across dimension (e.g., products, regions)"""
    analysis = df.groupby(dimension_col).agg({
        REVENUE_COLUMN: ['sum', 'mean', 'count']
    }).reset_index()
    analysis.columns = [dimension_col, 'Total_Revenue', 'Avg_Order', 'Order_Count']
    
    # Calculate growth rates
    for year in sorted(df['Year'].unique())[1:]:
        prev_year = year - 1
        current = df[df['Year'] == year].groupby(dimension_col)[REVENUE_COLUMN].sum()
        previous = df[df['Year'] == prev_year].groupby(dimension_col)[REVENUE_COLUMN].sum()
        
        growth = calculate_yoy_growth(current, previous)
        analysis[f'Growth_{year}'] = growth
    
    return analysis

Best Practices for Advanced Analyses

  1. Always validate data quality first:

    from data_quality import generate_data_quality_report
    report = generate_data_quality_report(df)
    
  2. Use logging for complex analyses:

    from logger_config import get_logger
    logger = get_logger('advanced_analysis')
    logger.info("Starting complex analysis...")
    
  3. Export intermediate results:

    from export_utils import export_to_excel
    export_to_excel(intermediate_df, 'intermediate_results.xlsx')
    
  4. Generate comprehensive reports:

    from report_generator import generate_pdf_report
    generate_pdf_report(charts=['chart1.png', 'chart2.png'], summary_data=summary)
    
  5. Test statistical significance:

    from statistical_utils import test_statistical_significance
    # Always test before making conclusions
    

Cursor AI Prompts for Advanced Analyses

When using Cursor, try these prompts:

  • "Create a cohort retention analysis with heatmaps"
  • "Build a price-volume-mix decomposition analysis"
  • "Generate a customer lifetime value analysis with segmentation"
  • "Create a forecasting model with confidence intervals"
  • "Build a multi-dimensional analysis across product, customer, and geography"
  • "Create an anomaly detection analysis for unusual transactions"

The AI will automatically use these patterns and the template utilities.


Last Updated: January 2026
For: Advanced users and AI-assisted development