""" Export utilities for analysis results Provides functions to export DataFrames and summary data to CSV and Excel Usage: from export_utils import export_to_csv, export_to_excel, export_summary_table # Export DataFrame to CSV export_to_csv(df, 'results.csv') # Export DataFrame to Excel export_to_excel(df, 'results.xlsx', sheet_name='Data') # Export summary table export_summary_table({'Metric1': 100, 'Metric2': 200}, 'summary.xlsx') """ import pandas as pd from pathlib import Path from config import REPORTS_DIR, ensure_directories def export_to_csv(df, filename, output_dir=None, index=True): """ Export DataFrame to CSV with proper formatting Args: df: DataFrame to export filename: Output filename (e.g., 'results.csv') output_dir: Output directory (defaults to config.REPORTS_DIR) index: Whether to include index in export (default: True) Returns: Path to exported file """ if output_dir is None: output_dir = REPORTS_DIR else: output_dir = Path(output_dir) ensure_directories() output_dir.mkdir(exist_ok=True) filepath = output_dir / filename df.to_csv(filepath, index=index, encoding='utf-8-sig') print(f"Exported to CSV: {filepath}") return filepath def export_to_excel(df, filename, sheet_name='Data', output_dir=None, index=True): """ Export DataFrame to Excel with formatting Args: df: DataFrame to export filename: Output filename (e.g., 'results.xlsx') sheet_name: Excel sheet name (default: 'Data') output_dir: Output directory (defaults to config.REPORTS_DIR) index: Whether to include index in export (default: True) Returns: Path to exported file Raises: ImportError: If openpyxl is not installed """ try: import openpyxl except ImportError: raise ImportError( "openpyxl is required for Excel export. Install with: pip install openpyxl" ) if output_dir is None: output_dir = REPORTS_DIR else: output_dir = Path(output_dir) ensure_directories() output_dir.mkdir(exist_ok=True) filepath = output_dir / filename # Create Excel writer with pd.ExcelWriter(filepath, engine='openpyxl') as writer: df.to_excel(writer, sheet_name=sheet_name, index=index) # Auto-adjust column widths worksheet = writer.sheets[sheet_name] for idx, col in enumerate(df.columns, 1): max_length = max( df[col].astype(str).map(len).max(), len(str(col)) ) # Cap at 50 characters for readability adjusted_width = min(max_length + 2, 50) worksheet.column_dimensions[chr(64 + idx)].width = adjusted_width print(f"Exported to Excel: {filepath}") return filepath def export_summary_table(data_dict, filename, output_dir=None, title=None): """ Export summary statistics to formatted table (Excel) Args: data_dict: Dictionary of {metric_name: value} pairs filename: Output filename (e.g., 'summary.xlsx') output_dir: Output directory (defaults to config.REPORTS_DIR) title: Optional title for the summary table Returns: Path to exported file Example: export_summary_table({ 'Total Revenue': 1000000, 'Customer Count': 500, 'Average Order Value': 2000 }, 'summary.xlsx') """ try: import openpyxl except ImportError: raise ImportError( "openpyxl is required for Excel export. Install with: pip install openpyxl" ) if output_dir is None: output_dir = REPORTS_DIR else: output_dir = Path(output_dir) ensure_directories() output_dir.mkdir(exist_ok=True) filepath = output_dir / filename # Create DataFrame from dictionary df = pd.DataFrame({ 'Metric': list(data_dict.keys()), 'Value': list(data_dict.values()) }) # Format numeric values def format_value(val): if isinstance(val, (int, float)): if abs(val) >= 1e6: return f"${val / 1e6:.2f}m" elif abs(val) >= 1e3: return f"${val / 1e3:.2f}k" else: return f"${val:.2f}" return str(val) df['Formatted_Value'] = df['Value'].apply(format_value) # Create Excel writer with pd.ExcelWriter(filepath, engine='openpyxl') as writer: df.to_excel(writer, sheet_name='Summary', index=False) # Format worksheet worksheet = writer.sheets['Summary'] # Set column widths worksheet.column_dimensions['A'].width = 30 worksheet.column_dimensions['B'].width = 20 worksheet.column_dimensions['C'].width = 20 # Add title if provided if title: worksheet.insert_rows(1) worksheet.merge_cells('A1:C1') worksheet['A1'] = title worksheet['A1'].font = openpyxl.styles.Font(bold=True, size=14) worksheet['A1'].alignment = openpyxl.styles.Alignment(horizontal='center') print(f"Exported summary table to Excel: {filepath}") return filepath def export_multiple_sheets(data_dict, filename, output_dir=None): """ Export multiple DataFrames to Excel with multiple sheets Args: data_dict: Dictionary of {sheet_name: DataFrame} pairs filename: Output filename (e.g., 'results.xlsx') output_dir: Output directory (defaults to config.REPORTS_DIR) Returns: Path to exported file Example: export_multiple_sheets({ 'Revenue': revenue_df, 'Customers': customer_df, 'Products': product_df }, 'analysis_results.xlsx') """ try: import openpyxl except ImportError: raise ImportError( "openpyxl is required for Excel export. Install with: pip install openpyxl" ) if output_dir is None: output_dir = REPORTS_DIR else: output_dir = Path(output_dir) ensure_directories() output_dir.mkdir(exist_ok=True) filepath = output_dir / filename # Create Excel writer with pd.ExcelWriter(filepath, engine='openpyxl') as writer: for sheet_name, df in data_dict.items(): # Truncate sheet name to 31 characters (Excel limit) safe_sheet_name = sheet_name[:31] df.to_excel(writer, sheet_name=safe_sheet_name, index=True) # Auto-adjust column widths worksheet = writer.sheets[safe_sheet_name] for idx, col in enumerate(df.columns, 1): max_length = max( df[col].astype(str).map(len).max(), len(str(col)) ) adjusted_width = min(max_length + 2, 50) col_letter = openpyxl.utils.get_column_letter(idx) worksheet.column_dimensions[col_letter].width = adjusted_width print(f"Exported {len(data_dict)} sheets to Excel: {filepath}") return filepath