Files
sales-data-analysis/config.py
Jonathan Pressnell cf0b596449 Initial commit: sales analysis template
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-06 09:16:34 -05:00

278 lines
10 KiB
Python

"""
Configuration file for sales analysis scripts
CONFIGURE THIS FILE FOR YOUR COMPANY'S SPECIFIC DATA STRUCTURE
This file should be customized based on:
- Your data file name and location
- Column names in your sales data
- Date range and LTM period
- Company-specific settings
CRITICAL: All column names, file paths, and settings are defined here.
Never hardcode these values in analysis scripts - always import from config.
Usage:
from config import REVENUE_COLUMN, DATE_COLUMN, get_data_path
revenue = df[REVENUE_COLUMN].sum() # ✅ Correct
revenue = df['USD'].sum() # ❌ Wrong - hardcoded
Quick Setup:
1. Run: python setup_wizard.py (interactive configuration)
2. Or manually edit this file following the TODO comments
3. Validate: python config_validator.py
See Also:
- .cursor/rules/analysis_patterns.md - How to use config values
- setup_wizard.py - Interactive configuration tool
- config_validator.py - Configuration validation
"""
from pathlib import Path
from typing import Optional, Tuple
import pandas as pd
# ============================================================================
# COMPANY INFORMATION
# ============================================================================
# TODO: Update these values for your company
COMPANY_NAME = "Your Company Name" # Update this
ANALYSIS_DATE = "2026-01-12" # Update this to current date
# ============================================================================
# DATA FILES
# ============================================================================
# TODO: Update with your actual data file name
DATA_FILE = 'sales_data.csv' # Update this to your CSV file name
OUTPUT_DIR = Path('charts')
REPORTS_DIR = Path('reports')
DATA_DIR = Path('data') # Optional: if data is in a subdirectory
# ============================================================================
# DATA COLUMN MAPPINGS
# ============================================================================
# TODO: Map these to your actual column names
# These are the expected column names - update if your CSV uses different names
# Revenue column (REQUIRED)
REVENUE_COLUMN = 'USD' # Common alternatives: 'Amount', 'Revenue', 'Total', 'Sales'
# Date columns (at least one required)
DATE_COLUMN = 'InvoiceDate' # Primary date column
DATE_FALLBACK_COLUMNS = ['Month', 'Year'] # Fallback columns if primary is missing
# Customer/Account columns
CUSTOMER_COLUMN = 'Customer' # Common alternatives: 'Account', 'CustomerName', 'Client'
# Product/Item columns
ITEM_COLUMN = 'Item' # Common alternatives: 'Product', 'SKU', 'ItemCode'
PRODUCT_GROUP_COLUMN = 'ProductGroup' # Optional: for product categorization
QUANTITY_COLUMN = 'Quantity' # Optional: for price calculations
# Geographic columns (optional)
REGION_COLUMN = 'Region' # Optional: for geographic analysis
COUNTRY_COLUMN = 'Country' # Optional: for country-level analysis
# Segment/Category columns (optional - customize based on your data)
SEGMENT_COLUMNS = {
'Technology': 'Technology', # Optional: technology/product type
'EndMarket': 'EndMarket', # Optional: end market/industry
'ProductGroup': 'ProductGroup', # Optional: product category
}
# Invoice/Transaction columns
INVOICE_NUMBER_COLUMN = 'Invoice #' # Optional: for transaction-level analysis
# ============================================================================
# DATE RANGE CONFIGURATION
# ============================================================================
# TODO: Update these based on your data and analysis needs
# Analysis years (years to include in analysis)
ANALYSIS_YEARS = [2021, 2022, 2023, 2024, 2025] # Update based on your data
# LTM (Last Twelve Months) Configuration
# For the most recent partial year, use LTM for apples-to-apples comparison
# Example: If latest data is through September 2025, use Oct 2024 - Sep 2025
LTM_ENABLED = True # Set to False if you have complete calendar years only
LTM_START_MONTH = 10 # Month number (1-12) for LTM start
LTM_START_YEAR = 2024 # Year for LTM start
LTM_END_MONTH = 9 # Month number (1-12) for LTM end
LTM_END_YEAR = 2025 # Year for LTM end
# Generate LTM period objects
if LTM_ENABLED:
LTM_START = pd.Period(f'{LTM_START_YEAR}-{LTM_START_MONTH:02d}', freq='M')
LTM_END = pd.Period(f'{LTM_END_YEAR}-{LTM_END_MONTH:02d}', freq='M')
LTM_LABEL = f'{LTM_END_YEAR} (LTM {LTM_END_MONTH}/{LTM_END_YEAR})'
else:
LTM_START = None
LTM_END = None
LTM_LABEL = None
# Data date range (filter data to this range)
MIN_YEAR = 2021 # Minimum year to include
MAX_DATE = pd.Timestamp('2025-09-30') # Maximum date to include (update based on your data)
# ============================================================================
# CHART SETTINGS
# ============================================================================
CHART_DPI = 300
CHART_FORMAT = 'png'
CHART_BBOX = 'tight'
CHART_STYLE = 'seaborn-v0_8' # Options: 'default', 'ggplot', 'seaborn-v0_8', etc.
# Chart size presets
CHART_SIZES = {
'small': (6, 4),
'medium': (10, 6),
'large': (12, 8),
'wide': (14, 6)
}
# ============================================================================
# DATA FILTERING
# ============================================================================
# Quantity filtering for price calculations (exclude outliers)
MIN_QUANTITY = 0 # Minimum valid quantity
MAX_QUANTITY = 1000 # Maximum valid quantity (adjust based on your data)
# Revenue filtering (optional - exclude negative values, returns, etc.)
EXCLUDE_NEGATIVE_REVENUE = False # Set to True to exclude negative revenue (returns/credits)
MIN_REVENUE = None # Optional: minimum revenue threshold
# ============================================================================
# EXCLUSION FILTERS (Optional)
# ============================================================================
# Use this section to exclude specific segments, customers, or products
# Example: Exclude a business unit, test accounts, etc.
EXCLUSION_FILTERS = {
'enabled': False, # Set to True to enable exclusions
'exclude_by_column': None, # Column name to filter on (e.g., 'Country', 'Segment')
'exclude_values': [], # List of values to exclude (e.g., ['KVT', 'Test'])
}
# ============================================================================
# VALIDATION THRESHOLDS (Optional)
# ============================================================================
# Expected revenue ranges for validation (update based on your company)
# These are used to validate that data loading is working correctly
VALIDATION_ENABLED = False # Set to True to enable validation
EXPECTED_REVENUE = {} # Example: {2021: 99_880_000, 2024: 89_990_000}
REVENUE_TOLERANCE_PCT = 0.01 # 1% tolerance for validation
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def ensure_directories() -> None:
"""
Create output directories if they don't exist
Creates charts/ and reports/ directories for saving analysis outputs.
Called automatically by get_chart_path() and get_report_path().
Returns:
None: Creates directories in place
"""
OUTPUT_DIR.mkdir(exist_ok=True)
REPORTS_DIR.mkdir(exist_ok=True)
if DATA_DIR.exists():
DATA_DIR.mkdir(exist_ok=True)
def get_chart_path(filename: str) -> Path:
"""
Get full path for chart file
Args:
filename: Chart filename (e.g., 'revenue_trend.png')
Returns:
Path: Full path to chart file in OUTPUT_DIR
"""
ensure_directories()
return OUTPUT_DIR / filename
def get_report_path(filename: str) -> Path:
"""
Get full path for report file
Args:
filename: Report filename (e.g., 'analysis_report.pdf')
Returns:
Path: Full path to report file in REPORTS_DIR
"""
ensure_directories()
return REPORTS_DIR / filename
def get_data_path(filename: Optional[str] = None) -> Path:
"""
Get full path for data file
This function handles data file location logic:
- If DATA_DIR exists, looks there first
- Otherwise uses current directory
- Defaults to DATA_FILE from config if filename not provided
Args:
filename: Optional filename override (defaults to config.DATA_FILE)
Returns:
Path: Full path to data file
Example:
>>> from config import get_data_path
>>> data_path = get_data_path()
>>> print(f"Loading from: {data_path}")
"""
if filename is None:
filename = DATA_FILE
if DATA_DIR.exists():
return DATA_DIR / filename
return Path(filename)
def get_ltm_period() -> Tuple[Optional[pd.Period], Optional[pd.Period]]:
"""
Get LTM (Last Twelve Months) period boundaries from config
Returns LTM start and end periods if LTM is enabled and configured,
otherwise returns (None, None).
Returns:
Tuple[Optional[pd.Period], Optional[pd.Period]]:
(ltm_start, ltm_end) or (None, None) if disabled
Example:
>>> ltm_start, ltm_end = get_ltm_period()
>>> if ltm_start and ltm_end:
... print(f"LTM: {ltm_start} to {ltm_end}")
See Also:
- get_ltm_label() - Get formatted LTM label string
- .cursor/rules/ltm_methodology.md - LTM explanation
"""
if LTM_ENABLED and LTM_START and LTM_END:
return LTM_START, LTM_END
return None, None
def get_ltm_label() -> Optional[str]:
"""
Get LTM label string for display
Returns formatted label like "2025 (LTM 9/2025)" if LTM is enabled,
otherwise None. Use this in chart titles and labels.
Returns:
Optional[str]: LTM label string or None if LTM disabled
Example:
>>> from config import get_ltm_label
>>> ltm_label = get_ltm_label()
>>> if ltm_label:
... title = f'Revenue Trend\n({ltm_label})'
See Also:
- get_ltm_period() - Get LTM period objects
- .cursor/rules/ltm_methodology.md - LTM usage guide
"""
return LTM_LABEL if LTM_ENABLED else None