""" Configuration file for sales analysis scripts CONFIGURE THIS FILE FOR YOUR COMPANY'S SPECIFIC DATA STRUCTURE This file should be customized based on: - Your data file name and location - Column names in your sales data - Date range and LTM period - Company-specific settings CRITICAL: All column names, file paths, and settings are defined here. Never hardcode these values in analysis scripts - always import from config. Usage: from config import REVENUE_COLUMN, DATE_COLUMN, get_data_path revenue = df[REVENUE_COLUMN].sum() # ✅ Correct revenue = df['USD'].sum() # ❌ Wrong - hardcoded Quick Setup: 1. Run: python setup_wizard.py (interactive configuration) 2. Or manually edit this file following the TODO comments 3. Validate: python config_validator.py See Also: - .cursor/rules/analysis_patterns.md - How to use config values - setup_wizard.py - Interactive configuration tool - config_validator.py - Configuration validation """ from pathlib import Path from typing import Optional, Tuple import pandas as pd # ============================================================================ # COMPANY INFORMATION # ============================================================================ # TODO: Update these values for your company COMPANY_NAME = "Your Company Name" # Update this ANALYSIS_DATE = "2026-01-12" # Update this to current date # ============================================================================ # DATA FILES # ============================================================================ # TODO: Update with your actual data file name DATA_FILE = 'sales_data.csv' # Update this to your CSV file name OUTPUT_DIR = Path('charts') REPORTS_DIR = Path('reports') DATA_DIR = Path('data') # Optional: if data is in a subdirectory # ============================================================================ # DATA COLUMN MAPPINGS # ============================================================================ # TODO: Map these to your actual column names # These are the expected column names - update if your CSV uses different names # Revenue column (REQUIRED) REVENUE_COLUMN = 'USD' # Common alternatives: 'Amount', 'Revenue', 'Total', 'Sales' # Date columns (at least one required) DATE_COLUMN = 'InvoiceDate' # Primary date column DATE_FALLBACK_COLUMNS = ['Month', 'Year'] # Fallback columns if primary is missing # Customer/Account columns CUSTOMER_COLUMN = 'Customer' # Common alternatives: 'Account', 'CustomerName', 'Client' # Product/Item columns ITEM_COLUMN = 'Item' # Common alternatives: 'Product', 'SKU', 'ItemCode' PRODUCT_GROUP_COLUMN = 'ProductGroup' # Optional: for product categorization QUANTITY_COLUMN = 'Quantity' # Optional: for price calculations # Geographic columns (optional) REGION_COLUMN = 'Region' # Optional: for geographic analysis COUNTRY_COLUMN = 'Country' # Optional: for country-level analysis # Segment/Category columns (optional - customize based on your data) SEGMENT_COLUMNS = { 'Technology': 'Technology', # Optional: technology/product type 'EndMarket': 'EndMarket', # Optional: end market/industry 'ProductGroup': 'ProductGroup', # Optional: product category } # Invoice/Transaction columns INVOICE_NUMBER_COLUMN = 'Invoice #' # Optional: for transaction-level analysis # ============================================================================ # DATE RANGE CONFIGURATION # ============================================================================ # TODO: Update these based on your data and analysis needs # Analysis years (years to include in analysis) ANALYSIS_YEARS = [2021, 2022, 2023, 2024, 2025] # Update based on your data # LTM (Last Twelve Months) Configuration # For the most recent partial year, use LTM for apples-to-apples comparison # Example: If latest data is through September 2025, use Oct 2024 - Sep 2025 LTM_ENABLED = True # Set to False if you have complete calendar years only LTM_START_MONTH = 10 # Month number (1-12) for LTM start LTM_START_YEAR = 2024 # Year for LTM start LTM_END_MONTH = 9 # Month number (1-12) for LTM end LTM_END_YEAR = 2025 # Year for LTM end # Generate LTM period objects if LTM_ENABLED: LTM_START = pd.Period(f'{LTM_START_YEAR}-{LTM_START_MONTH:02d}', freq='M') LTM_END = pd.Period(f'{LTM_END_YEAR}-{LTM_END_MONTH:02d}', freq='M') LTM_LABEL = f'{LTM_END_YEAR} (LTM {LTM_END_MONTH}/{LTM_END_YEAR})' else: LTM_START = None LTM_END = None LTM_LABEL = None # Data date range (filter data to this range) MIN_YEAR = 2021 # Minimum year to include MAX_DATE = pd.Timestamp('2025-09-30') # Maximum date to include (update based on your data) # ============================================================================ # CHART SETTINGS # ============================================================================ CHART_DPI = 300 CHART_FORMAT = 'png' CHART_BBOX = 'tight' CHART_STYLE = 'seaborn-v0_8' # Options: 'default', 'ggplot', 'seaborn-v0_8', etc. # Chart size presets CHART_SIZES = { 'small': (6, 4), 'medium': (10, 6), 'large': (12, 8), 'wide': (14, 6) } # ============================================================================ # DATA FILTERING # ============================================================================ # Quantity filtering for price calculations (exclude outliers) MIN_QUANTITY = 0 # Minimum valid quantity MAX_QUANTITY = 1000 # Maximum valid quantity (adjust based on your data) # Revenue filtering (optional - exclude negative values, returns, etc.) EXCLUDE_NEGATIVE_REVENUE = False # Set to True to exclude negative revenue (returns/credits) MIN_REVENUE = None # Optional: minimum revenue threshold # ============================================================================ # EXCLUSION FILTERS (Optional) # ============================================================================ # Use this section to exclude specific segments, customers, or products # Example: Exclude a business unit, test accounts, etc. EXCLUSION_FILTERS = { 'enabled': False, # Set to True to enable exclusions 'exclude_by_column': None, # Column name to filter on (e.g., 'Country', 'Segment') 'exclude_values': [], # List of values to exclude (e.g., ['KVT', 'Test']) } # ============================================================================ # VALIDATION THRESHOLDS (Optional) # ============================================================================ # Expected revenue ranges for validation (update based on your company) # These are used to validate that data loading is working correctly VALIDATION_ENABLED = False # Set to True to enable validation EXPECTED_REVENUE = {} # Example: {2021: 99_880_000, 2024: 89_990_000} REVENUE_TOLERANCE_PCT = 0.01 # 1% tolerance for validation # ============================================================================ # HELPER FUNCTIONS # ============================================================================ def ensure_directories() -> None: """ Create output directories if they don't exist Creates charts/ and reports/ directories for saving analysis outputs. Called automatically by get_chart_path() and get_report_path(). Returns: None: Creates directories in place """ OUTPUT_DIR.mkdir(exist_ok=True) REPORTS_DIR.mkdir(exist_ok=True) if DATA_DIR.exists(): DATA_DIR.mkdir(exist_ok=True) def get_chart_path(filename: str) -> Path: """ Get full path for chart file Args: filename: Chart filename (e.g., 'revenue_trend.png') Returns: Path: Full path to chart file in OUTPUT_DIR """ ensure_directories() return OUTPUT_DIR / filename def get_report_path(filename: str) -> Path: """ Get full path for report file Args: filename: Report filename (e.g., 'analysis_report.pdf') Returns: Path: Full path to report file in REPORTS_DIR """ ensure_directories() return REPORTS_DIR / filename def get_data_path(filename: Optional[str] = None) -> Path: """ Get full path for data file This function handles data file location logic: - If DATA_DIR exists, looks there first - Otherwise uses current directory - Defaults to DATA_FILE from config if filename not provided Args: filename: Optional filename override (defaults to config.DATA_FILE) Returns: Path: Full path to data file Example: >>> from config import get_data_path >>> data_path = get_data_path() >>> print(f"Loading from: {data_path}") """ if filename is None: filename = DATA_FILE if DATA_DIR.exists(): return DATA_DIR / filename return Path(filename) def get_ltm_period() -> Tuple[Optional[pd.Period], Optional[pd.Period]]: """ Get LTM (Last Twelve Months) period boundaries from config Returns LTM start and end periods if LTM is enabled and configured, otherwise returns (None, None). Returns: Tuple[Optional[pd.Period], Optional[pd.Period]]: (ltm_start, ltm_end) or (None, None) if disabled Example: >>> ltm_start, ltm_end = get_ltm_period() >>> if ltm_start and ltm_end: ... print(f"LTM: {ltm_start} to {ltm_end}") See Also: - get_ltm_label() - Get formatted LTM label string - .cursor/rules/ltm_methodology.md - LTM explanation """ if LTM_ENABLED and LTM_START and LTM_END: return LTM_START, LTM_END return None, None def get_ltm_label() -> Optional[str]: """ Get LTM label string for display Returns formatted label like "2025 (LTM 9/2025)" if LTM is enabled, otherwise None. Use this in chart titles and labels. Returns: Optional[str]: LTM label string or None if LTM disabled Example: >>> from config import get_ltm_label >>> ltm_label = get_ltm_label() >>> if ltm_label: ... title = f'Revenue Trend\n({ltm_label})' See Also: - get_ltm_period() - Get LTM period objects - .cursor/rules/ltm_methodology.md - LTM usage guide """ return LTM_LABEL if LTM_ENABLED else None