278 lines
10 KiB
Python
278 lines
10 KiB
Python
"""
|
|
Configuration file for sales analysis scripts
|
|
CONFIGURE THIS FILE FOR YOUR COMPANY'S SPECIFIC DATA STRUCTURE
|
|
|
|
This file should be customized based on:
|
|
- Your data file name and location
|
|
- Column names in your sales data
|
|
- Date range and LTM period
|
|
- Company-specific settings
|
|
|
|
CRITICAL: All column names, file paths, and settings are defined here.
|
|
Never hardcode these values in analysis scripts - always import from config.
|
|
|
|
Usage:
|
|
from config import REVENUE_COLUMN, DATE_COLUMN, get_data_path
|
|
revenue = df[REVENUE_COLUMN].sum() # ✅ Correct
|
|
revenue = df['USD'].sum() # ❌ Wrong - hardcoded
|
|
|
|
Quick Setup:
|
|
1. Run: python setup_wizard.py (interactive configuration)
|
|
2. Or manually edit this file following the TODO comments
|
|
3. Validate: python config_validator.py
|
|
|
|
See Also:
|
|
- .cursor/rules/analysis_patterns.md - How to use config values
|
|
- setup_wizard.py - Interactive configuration tool
|
|
- config_validator.py - Configuration validation
|
|
"""
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple
|
|
import pandas as pd
|
|
|
|
# ============================================================================
|
|
# COMPANY INFORMATION
|
|
# ============================================================================
|
|
# TODO: Update these values for your company
|
|
COMPANY_NAME = "Your Company Name" # Update this
|
|
ANALYSIS_DATE = "2026-01-12" # Update this to current date
|
|
|
|
# ============================================================================
|
|
# DATA FILES
|
|
# ============================================================================
|
|
# TODO: Update with your actual data file name
|
|
DATA_FILE = 'sales_data.csv' # Update this to your CSV file name
|
|
OUTPUT_DIR = Path('charts')
|
|
REPORTS_DIR = Path('reports')
|
|
DATA_DIR = Path('data') # Optional: if data is in a subdirectory
|
|
|
|
# ============================================================================
|
|
# DATA COLUMN MAPPINGS
|
|
# ============================================================================
|
|
# TODO: Map these to your actual column names
|
|
# These are the expected column names - update if your CSV uses different names
|
|
|
|
# Revenue column (REQUIRED)
|
|
REVENUE_COLUMN = 'USD' # Common alternatives: 'Amount', 'Revenue', 'Total', 'Sales'
|
|
|
|
# Date columns (at least one required)
|
|
DATE_COLUMN = 'InvoiceDate' # Primary date column
|
|
DATE_FALLBACK_COLUMNS = ['Month', 'Year'] # Fallback columns if primary is missing
|
|
|
|
# Customer/Account columns
|
|
CUSTOMER_COLUMN = 'Customer' # Common alternatives: 'Account', 'CustomerName', 'Client'
|
|
|
|
# Product/Item columns
|
|
ITEM_COLUMN = 'Item' # Common alternatives: 'Product', 'SKU', 'ItemCode'
|
|
PRODUCT_GROUP_COLUMN = 'ProductGroup' # Optional: for product categorization
|
|
QUANTITY_COLUMN = 'Quantity' # Optional: for price calculations
|
|
|
|
# Geographic columns (optional)
|
|
REGION_COLUMN = 'Region' # Optional: for geographic analysis
|
|
COUNTRY_COLUMN = 'Country' # Optional: for country-level analysis
|
|
|
|
# Segment/Category columns (optional - customize based on your data)
|
|
SEGMENT_COLUMNS = {
|
|
'Technology': 'Technology', # Optional: technology/product type
|
|
'EndMarket': 'EndMarket', # Optional: end market/industry
|
|
'ProductGroup': 'ProductGroup', # Optional: product category
|
|
}
|
|
|
|
# Invoice/Transaction columns
|
|
INVOICE_NUMBER_COLUMN = 'Invoice #' # Optional: for transaction-level analysis
|
|
|
|
# ============================================================================
|
|
# DATE RANGE CONFIGURATION
|
|
# ============================================================================
|
|
# TODO: Update these based on your data and analysis needs
|
|
|
|
# Analysis years (years to include in analysis)
|
|
ANALYSIS_YEARS = [2021, 2022, 2023, 2024, 2025] # Update based on your data
|
|
|
|
# LTM (Last Twelve Months) Configuration
|
|
# For the most recent partial year, use LTM for apples-to-apples comparison
|
|
# Example: If latest data is through September 2025, use Oct 2024 - Sep 2025
|
|
LTM_ENABLED = True # Set to False if you have complete calendar years only
|
|
LTM_START_MONTH = 10 # Month number (1-12) for LTM start
|
|
LTM_START_YEAR = 2024 # Year for LTM start
|
|
LTM_END_MONTH = 9 # Month number (1-12) for LTM end
|
|
LTM_END_YEAR = 2025 # Year for LTM end
|
|
|
|
# Generate LTM period objects
|
|
if LTM_ENABLED:
|
|
LTM_START = pd.Period(f'{LTM_START_YEAR}-{LTM_START_MONTH:02d}', freq='M')
|
|
LTM_END = pd.Period(f'{LTM_END_YEAR}-{LTM_END_MONTH:02d}', freq='M')
|
|
LTM_LABEL = f'{LTM_END_YEAR} (LTM {LTM_END_MONTH}/{LTM_END_YEAR})'
|
|
else:
|
|
LTM_START = None
|
|
LTM_END = None
|
|
LTM_LABEL = None
|
|
|
|
# Data date range (filter data to this range)
|
|
MIN_YEAR = 2021 # Minimum year to include
|
|
MAX_DATE = pd.Timestamp('2025-09-30') # Maximum date to include (update based on your data)
|
|
|
|
# ============================================================================
|
|
# CHART SETTINGS
|
|
# ============================================================================
|
|
CHART_DPI = 300
|
|
CHART_FORMAT = 'png'
|
|
CHART_BBOX = 'tight'
|
|
CHART_STYLE = 'seaborn-v0_8' # Options: 'default', 'ggplot', 'seaborn-v0_8', etc.
|
|
|
|
# Chart size presets
|
|
CHART_SIZES = {
|
|
'small': (6, 4),
|
|
'medium': (10, 6),
|
|
'large': (12, 8),
|
|
'wide': (14, 6)
|
|
}
|
|
|
|
# ============================================================================
|
|
# DATA FILTERING
|
|
# ============================================================================
|
|
# Quantity filtering for price calculations (exclude outliers)
|
|
MIN_QUANTITY = 0 # Minimum valid quantity
|
|
MAX_QUANTITY = 1000 # Maximum valid quantity (adjust based on your data)
|
|
|
|
# Revenue filtering (optional - exclude negative values, returns, etc.)
|
|
EXCLUDE_NEGATIVE_REVENUE = False # Set to True to exclude negative revenue (returns/credits)
|
|
MIN_REVENUE = None # Optional: minimum revenue threshold
|
|
|
|
# ============================================================================
|
|
# EXCLUSION FILTERS (Optional)
|
|
# ============================================================================
|
|
# Use this section to exclude specific segments, customers, or products
|
|
# Example: Exclude a business unit, test accounts, etc.
|
|
|
|
EXCLUSION_FILTERS = {
|
|
'enabled': False, # Set to True to enable exclusions
|
|
'exclude_by_column': None, # Column name to filter on (e.g., 'Country', 'Segment')
|
|
'exclude_values': [], # List of values to exclude (e.g., ['KVT', 'Test'])
|
|
}
|
|
|
|
# ============================================================================
|
|
# VALIDATION THRESHOLDS (Optional)
|
|
# ============================================================================
|
|
# Expected revenue ranges for validation (update based on your company)
|
|
# These are used to validate that data loading is working correctly
|
|
VALIDATION_ENABLED = False # Set to True to enable validation
|
|
EXPECTED_REVENUE = {} # Example: {2021: 99_880_000, 2024: 89_990_000}
|
|
REVENUE_TOLERANCE_PCT = 0.01 # 1% tolerance for validation
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
def ensure_directories() -> None:
|
|
"""
|
|
Create output directories if they don't exist
|
|
|
|
Creates charts/ and reports/ directories for saving analysis outputs.
|
|
Called automatically by get_chart_path() and get_report_path().
|
|
|
|
Returns:
|
|
None: Creates directories in place
|
|
"""
|
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
REPORTS_DIR.mkdir(exist_ok=True)
|
|
if DATA_DIR.exists():
|
|
DATA_DIR.mkdir(exist_ok=True)
|
|
|
|
def get_chart_path(filename: str) -> Path:
|
|
"""
|
|
Get full path for chart file
|
|
|
|
Args:
|
|
filename: Chart filename (e.g., 'revenue_trend.png')
|
|
|
|
Returns:
|
|
Path: Full path to chart file in OUTPUT_DIR
|
|
"""
|
|
ensure_directories()
|
|
return OUTPUT_DIR / filename
|
|
|
|
def get_report_path(filename: str) -> Path:
|
|
"""
|
|
Get full path for report file
|
|
|
|
Args:
|
|
filename: Report filename (e.g., 'analysis_report.pdf')
|
|
|
|
Returns:
|
|
Path: Full path to report file in REPORTS_DIR
|
|
"""
|
|
ensure_directories()
|
|
return REPORTS_DIR / filename
|
|
|
|
def get_data_path(filename: Optional[str] = None) -> Path:
|
|
"""
|
|
Get full path for data file
|
|
|
|
This function handles data file location logic:
|
|
- If DATA_DIR exists, looks there first
|
|
- Otherwise uses current directory
|
|
- Defaults to DATA_FILE from config if filename not provided
|
|
|
|
Args:
|
|
filename: Optional filename override (defaults to config.DATA_FILE)
|
|
|
|
Returns:
|
|
Path: Full path to data file
|
|
|
|
Example:
|
|
>>> from config import get_data_path
|
|
>>> data_path = get_data_path()
|
|
>>> print(f"Loading from: {data_path}")
|
|
"""
|
|
if filename is None:
|
|
filename = DATA_FILE
|
|
if DATA_DIR.exists():
|
|
return DATA_DIR / filename
|
|
return Path(filename)
|
|
|
|
def get_ltm_period() -> Tuple[Optional[pd.Period], Optional[pd.Period]]:
|
|
"""
|
|
Get LTM (Last Twelve Months) period boundaries from config
|
|
|
|
Returns LTM start and end periods if LTM is enabled and configured,
|
|
otherwise returns (None, None).
|
|
|
|
Returns:
|
|
Tuple[Optional[pd.Period], Optional[pd.Period]]:
|
|
(ltm_start, ltm_end) or (None, None) if disabled
|
|
|
|
Example:
|
|
>>> ltm_start, ltm_end = get_ltm_period()
|
|
>>> if ltm_start and ltm_end:
|
|
... print(f"LTM: {ltm_start} to {ltm_end}")
|
|
|
|
See Also:
|
|
- get_ltm_label() - Get formatted LTM label string
|
|
- .cursor/rules/ltm_methodology.md - LTM explanation
|
|
"""
|
|
if LTM_ENABLED and LTM_START and LTM_END:
|
|
return LTM_START, LTM_END
|
|
return None, None
|
|
|
|
def get_ltm_label() -> Optional[str]:
|
|
"""
|
|
Get LTM label string for display
|
|
|
|
Returns formatted label like "2025 (LTM 9/2025)" if LTM is enabled,
|
|
otherwise None. Use this in chart titles and labels.
|
|
|
|
Returns:
|
|
Optional[str]: LTM label string or None if LTM disabled
|
|
|
|
Example:
|
|
>>> from config import get_ltm_label
|
|
>>> ltm_label = get_ltm_label()
|
|
>>> if ltm_label:
|
|
... title = f'Revenue Trend\n({ltm_label})'
|
|
|
|
See Also:
|
|
- get_ltm_period() - Get LTM period objects
|
|
- .cursor/rules/ltm_methodology.md - LTM usage guide
|
|
"""
|
|
return LTM_LABEL if LTM_ENABLED else None
|