239 lines
7.2 KiB
Python
239 lines
7.2 KiB
Python
"""
|
|
Export utilities for analysis results
|
|
Provides functions to export DataFrames and summary data to CSV and Excel
|
|
|
|
Usage:
|
|
from export_utils import export_to_csv, export_to_excel, export_summary_table
|
|
|
|
# Export DataFrame to CSV
|
|
export_to_csv(df, 'results.csv')
|
|
|
|
# Export DataFrame to Excel
|
|
export_to_excel(df, 'results.xlsx', sheet_name='Data')
|
|
|
|
# Export summary table
|
|
export_summary_table({'Metric1': 100, 'Metric2': 200}, 'summary.xlsx')
|
|
"""
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from config import REPORTS_DIR, ensure_directories
|
|
|
|
def export_to_csv(df, filename, output_dir=None, index=True):
|
|
"""
|
|
Export DataFrame to CSV with proper formatting
|
|
|
|
Args:
|
|
df: DataFrame to export
|
|
filename: Output filename (e.g., 'results.csv')
|
|
output_dir: Output directory (defaults to config.REPORTS_DIR)
|
|
index: Whether to include index in export (default: True)
|
|
|
|
Returns:
|
|
Path to exported file
|
|
"""
|
|
if output_dir is None:
|
|
output_dir = REPORTS_DIR
|
|
else:
|
|
output_dir = Path(output_dir)
|
|
|
|
ensure_directories()
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
filepath = output_dir / filename
|
|
|
|
df.to_csv(filepath, index=index, encoding='utf-8-sig')
|
|
print(f"Exported to CSV: {filepath}")
|
|
|
|
return filepath
|
|
|
|
def export_to_excel(df, filename, sheet_name='Data', output_dir=None, index=True):
|
|
"""
|
|
Export DataFrame to Excel with formatting
|
|
|
|
Args:
|
|
df: DataFrame to export
|
|
filename: Output filename (e.g., 'results.xlsx')
|
|
sheet_name: Excel sheet name (default: 'Data')
|
|
output_dir: Output directory (defaults to config.REPORTS_DIR)
|
|
index: Whether to include index in export (default: True)
|
|
|
|
Returns:
|
|
Path to exported file
|
|
|
|
Raises:
|
|
ImportError: If openpyxl is not installed
|
|
"""
|
|
try:
|
|
import openpyxl
|
|
except ImportError:
|
|
raise ImportError(
|
|
"openpyxl is required for Excel export. Install with: pip install openpyxl"
|
|
)
|
|
|
|
if output_dir is None:
|
|
output_dir = REPORTS_DIR
|
|
else:
|
|
output_dir = Path(output_dir)
|
|
|
|
ensure_directories()
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
filepath = output_dir / filename
|
|
|
|
# Create Excel writer
|
|
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
|
df.to_excel(writer, sheet_name=sheet_name, index=index)
|
|
|
|
# Auto-adjust column widths
|
|
worksheet = writer.sheets[sheet_name]
|
|
for idx, col in enumerate(df.columns, 1):
|
|
max_length = max(
|
|
df[col].astype(str).map(len).max(),
|
|
len(str(col))
|
|
)
|
|
# Cap at 50 characters for readability
|
|
adjusted_width = min(max_length + 2, 50)
|
|
worksheet.column_dimensions[chr(64 + idx)].width = adjusted_width
|
|
|
|
print(f"Exported to Excel: {filepath}")
|
|
|
|
return filepath
|
|
|
|
def export_summary_table(data_dict, filename, output_dir=None, title=None):
|
|
"""
|
|
Export summary statistics to formatted table (Excel)
|
|
|
|
Args:
|
|
data_dict: Dictionary of {metric_name: value} pairs
|
|
filename: Output filename (e.g., 'summary.xlsx')
|
|
output_dir: Output directory (defaults to config.REPORTS_DIR)
|
|
title: Optional title for the summary table
|
|
|
|
Returns:
|
|
Path to exported file
|
|
|
|
Example:
|
|
export_summary_table({
|
|
'Total Revenue': 1000000,
|
|
'Customer Count': 500,
|
|
'Average Order Value': 2000
|
|
}, 'summary.xlsx')
|
|
"""
|
|
try:
|
|
import openpyxl
|
|
except ImportError:
|
|
raise ImportError(
|
|
"openpyxl is required for Excel export. Install with: pip install openpyxl"
|
|
)
|
|
|
|
if output_dir is None:
|
|
output_dir = REPORTS_DIR
|
|
else:
|
|
output_dir = Path(output_dir)
|
|
|
|
ensure_directories()
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
filepath = output_dir / filename
|
|
|
|
# Create DataFrame from dictionary
|
|
df = pd.DataFrame({
|
|
'Metric': list(data_dict.keys()),
|
|
'Value': list(data_dict.values())
|
|
})
|
|
|
|
# Format numeric values
|
|
def format_value(val):
|
|
if isinstance(val, (int, float)):
|
|
if abs(val) >= 1e6:
|
|
return f"${val / 1e6:.2f}m"
|
|
elif abs(val) >= 1e3:
|
|
return f"${val / 1e3:.2f}k"
|
|
else:
|
|
return f"${val:.2f}"
|
|
return str(val)
|
|
|
|
df['Formatted_Value'] = df['Value'].apply(format_value)
|
|
|
|
# Create Excel writer
|
|
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
|
df.to_excel(writer, sheet_name='Summary', index=False)
|
|
|
|
# Format worksheet
|
|
worksheet = writer.sheets['Summary']
|
|
|
|
# Set column widths
|
|
worksheet.column_dimensions['A'].width = 30
|
|
worksheet.column_dimensions['B'].width = 20
|
|
worksheet.column_dimensions['C'].width = 20
|
|
|
|
# Add title if provided
|
|
if title:
|
|
worksheet.insert_rows(1)
|
|
worksheet.merge_cells('A1:C1')
|
|
worksheet['A1'] = title
|
|
worksheet['A1'].font = openpyxl.styles.Font(bold=True, size=14)
|
|
worksheet['A1'].alignment = openpyxl.styles.Alignment(horizontal='center')
|
|
|
|
print(f"Exported summary table to Excel: {filepath}")
|
|
|
|
return filepath
|
|
|
|
def export_multiple_sheets(data_dict, filename, output_dir=None):
|
|
"""
|
|
Export multiple DataFrames to Excel with multiple sheets
|
|
|
|
Args:
|
|
data_dict: Dictionary of {sheet_name: DataFrame} pairs
|
|
filename: Output filename (e.g., 'results.xlsx')
|
|
output_dir: Output directory (defaults to config.REPORTS_DIR)
|
|
|
|
Returns:
|
|
Path to exported file
|
|
|
|
Example:
|
|
export_multiple_sheets({
|
|
'Revenue': revenue_df,
|
|
'Customers': customer_df,
|
|
'Products': product_df
|
|
}, 'analysis_results.xlsx')
|
|
"""
|
|
try:
|
|
import openpyxl
|
|
except ImportError:
|
|
raise ImportError(
|
|
"openpyxl is required for Excel export. Install with: pip install openpyxl"
|
|
)
|
|
|
|
if output_dir is None:
|
|
output_dir = REPORTS_DIR
|
|
else:
|
|
output_dir = Path(output_dir)
|
|
|
|
ensure_directories()
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
filepath = output_dir / filename
|
|
|
|
# Create Excel writer
|
|
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
|
for sheet_name, df in data_dict.items():
|
|
# Truncate sheet name to 31 characters (Excel limit)
|
|
safe_sheet_name = sheet_name[:31]
|
|
df.to_excel(writer, sheet_name=safe_sheet_name, index=True)
|
|
|
|
# Auto-adjust column widths
|
|
worksheet = writer.sheets[safe_sheet_name]
|
|
for idx, col in enumerate(df.columns, 1):
|
|
max_length = max(
|
|
df[col].astype(str).map(len).max(),
|
|
len(str(col))
|
|
)
|
|
adjusted_width = min(max_length + 2, 50)
|
|
col_letter = openpyxl.utils.get_column_letter(idx)
|
|
worksheet.column_dimensions[col_letter].width = adjusted_width
|
|
|
|
print(f"Exported {len(data_dict)} sheets to Excel: {filepath}")
|
|
|
|
return filepath
|