Add acceptance tests and align defaults to Sonnet 4

2026-02-23 14:45:57 -05:00
parent 14d5c360e5
commit 9480a3c994
12 changed files with 10034 additions and 85 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -30,7 +30,8 @@ DOCUMENT_AI_LOCATION=us
 DOCUMENT_AI_PROCESSOR_ID=your-processor-id
 GCS_BUCKET_NAME=your-gcs-bucket-name
 DOCUMENT_AI_OUTPUT_BUCKET_NAME=your-document-ai-output-bucket
-GOOGLE_APPLICATION_CREDENTIALS=./serviceAccountKey.json
+# Leave blank when using Firebase Functions secrets/Application Default Credentials
+GOOGLE_APPLICATION_CREDENTIALS=

 # Processing Strategy
 PROCESSING_STRATEGY=document_ai_genkit
@@ -72,4 +73,4 @@ AGENTIC_RAG_CONSISTENCY_CHECK=true
 # Monitoring and Logging
 AGENTIC_RAG_DETAILED_LOGGING=true
 AGENTIC_RAG_PERFORMANCE_TRACKING=true
-AGENTIC_RAG_ERROR_REPORTING=true
+AGENTIC_RAG_ERROR_REPORTING=true
--- a/backend/src/tests/acceptance/handiFoods.acceptance.test.ts
+++ b/backend/src/tests/acceptance/handiFoods.acceptance.test.ts
@@ -0,0 +1,78 @@
+import fs from 'fs';
+import path from 'path';
+
+type ReferenceFact = {
+  description: string;
+  tokens: string[];
+};
+
+const referenceFacts: ReferenceFact[] = [
+  {
+    description: 'Leading value-added positioning',
+    tokens: ['leading', 'value-added', 'baked snacks']
+  },
+  {
+    description: 'North American baked snack market size',
+    tokens: ['~$12b', 'north american', 'baked snack']
+  },
+  {
+    description: 'Private label and co-manufacturing focus',
+    tokens: ['private label', 'co-manufacturing']
+  },
+  {
+    description: 'Facility scale',
+    tokens: ['150k+']
+  }
+];
+
+const requiredFields = [
+  'geography:',
+  'industry sector:',
+  'key products services:'
+];
+
+const repoRoot = path.resolve(__dirname, '../../../..');
+const fixturesDir = path.join(repoRoot, 'backend', 'test-fixtures', 'handiFoods');
+const cimTextPath = path.join(fixturesDir, 'handi-foods-cim.txt');
+const outputTextPath = path.join(fixturesDir, 'handi-foods-output.txt');
+
+describe('Acceptance: Handi Foods CIM vs Generated Output', () => {
+  let cimNormalized: string;
+  let outputNormalized: string;
+  let outputLines: string[];
+
+  beforeAll(() => {
+    const normalize = (text: string) => text.replace(/\s+/g, ' ').toLowerCase();
+    const cimRaw = fs.readFileSync(cimTextPath, 'utf-8');
+    const outputRaw = fs.readFileSync(outputTextPath, 'utf-8');
+    cimNormalized = normalize(cimRaw);
+    outputNormalized = normalize(outputRaw);
+    outputLines = outputRaw
+      .split(/\r?\n/)
+      .map((line) => line.trim())
+      .filter(Boolean);
+  });
+
+  it('verifies each reference fact exists in the CIM and in the generated output', () => {
+    for (const fact of referenceFacts) {
+      for (const token of fact.tokens) {
+        expect(cimNormalized).toContain(token);
+        expect(outputNormalized).toContain(token);
+      }
+    }
+  });
+
+  it('ensures key fields are resolved instead of falling back to "Not specified in CIM"', () => {
+    const findFieldValue = (label: string) => {
+      const lowerLabel = label.toLowerCase();
+      const line = outputLines.find((l) => l.toLowerCase().startsWith(lowerLabel));
+      return line ? line.slice(label.length).trim() : '';
+    };
+
+    for (const label of requiredFields) {
+      const value = findFieldValue(label);
+      expect(value.length).toBeGreaterThan(0);
+      expect(value.toLowerCase()).not.toContain('not specified in cim');
+    }
+  });
+});
--- a/backend/src/config/env.ts
+++ b/backend/src/config/env.ts
@@ -93,7 +93,7 @@ const envSchema = Joi.object({
  DOCUMENT_AI_PROCESSOR_ID: Joi.string().required(),
  GCS_BUCKET_NAME: Joi.string().required(),
  DOCUMENT_AI_OUTPUT_BUCKET_NAME: Joi.string().required(),
-  GOOGLE_APPLICATION_CREDENTIALS: Joi.string().default('./serviceAccountKey.json'),
+  GOOGLE_APPLICATION_CREDENTIALS: Joi.string().allow('').default(''),
  
  // Vector Database Configuration
  VECTOR_PROVIDER: Joi.string().valid('supabase', 'pinecone').default('supabase'),
@@ -137,7 +137,7 @@ const envSchema = Joi.object({
    then: Joi.string().optional(), // Optional if using BYOK
    otherwise: Joi.string().allow('').optional()
  }),
-  LLM_MODEL: Joi.string().default('gpt-4'),
+  LLM_MODEL: Joi.string().default('claude-sonnet-4-20250514'),
  LLM_MAX_TOKENS: Joi.number().default(16000),
  LLM_TEMPERATURE: Joi.number().min(0).max(2).default(0.1),
  LLM_PROMPT_BUFFER: Joi.number().default(500),
@@ -308,17 +308,16 @@ export const config = {
    openrouterApiKey: process.env['OPENROUTER_API_KEY'] || envVars['OPENROUTER_API_KEY'],
    openrouterUseBYOK: envVars['OPENROUTER_USE_BYOK'] === 'true', // Use BYOK (Bring Your Own Key)
    
-    // Model Selection - Using latest Claude 4.5 models (Oct 2025)
-    // Claude Sonnet 4.5 is recommended for best balance of intelligence, speed, and cost
-    // Supports structured outputs for guaranteed JSON schema compliance
-    // NOTE: Claude Sonnet 4.5 offers improved accuracy and reasoning for full-document processing
-    model: envVars['LLM_MODEL'] || 'claude-sonnet-4-5-20250929', // Primary model (Claude Sonnet 4.5 - latest and most accurate)
-    fastModel: envVars['LLM_FAST_MODEL'] || 'claude-3-5-haiku-latest', // Fast model (Claude Haiku 3.5 latest - fastest and cheapest)
+    // Model Selection - Unified on Claude Sonnet 4 (May 2025 release)
+    // Claude Sonnet 4 20250514 is the currently supported, non-deprecated variant
+    // This keeps multi-pass extraction aligned with the same reasoning model across passes
+    model: envVars['LLM_MODEL'] || 'claude-sonnet-4-20250514', // Primary model (Claude Sonnet 4)
+    fastModel: envVars['LLM_FAST_MODEL'] || 'claude-sonnet-4-20250514', // Fast model aligned with Sonnet 4
    fallbackModel: envVars['LLM_FALLBACK_MODEL'] || 'gpt-4o', // Fallback for creativity

    // Task-specific model selection
-    // Use Haiku 3.5 for financial extraction - faster and cheaper, with validation fallback to Sonnet
-    financialModel: envVars['LLM_FINANCIAL_MODEL'] || 'claude-3-5-haiku-latest', // Fast model for financial extraction (Haiku 3.5 latest)
+    // Use Sonnet 4 for financial extraction to avoid deprecated Haiku endpoints
+    financialModel: envVars['LLM_FINANCIAL_MODEL'] || 'claude-sonnet-4-20250514', // Financial extraction model (Claude Sonnet 4)
    creativeModel: envVars['LLM_CREATIVE_MODEL'] || 'gpt-4o', // Best for creative content
    reasoningModel: envVars['LLM_REASONING_MODEL'] || 'claude-opus-4-1-20250805', // Best for complex reasoning (Opus 4.1)
    
@@ -449,4 +448,4 @@ export const getConfigHealth = () => {
  };
 };

-export default config;
+export default config;
--- a/backend/src/controllers/documentController.ts
+++ b/backend/src/controllers/documentController.ts
@@ -41,10 +41,11 @@ export const documentController = {
        return;
      }

-      // Validate file size (max 50MB)
-      if (fileSize > 50 * 1024 * 1024) {
+      const maxFileSize = config.upload.maxFileSize || 50 * 1024 * 1024;
+      if (fileSize > maxFileSize) {
+        const maxFileSizeMb = Math.round(maxFileSize / (1024 * 1024));
        res.status(400).json({
-          error: 'File size exceeds 50MB limit',
+          error: `File size exceeds ${maxFileSizeMb}MB limit`,
          correlationId: req.correlationId
        });
        return;
@@ -1013,4 +1014,4 @@ export const documentController = {
      throw new Error('Failed to get document text');
    }
  }
-};
+};
--- a/backend/src/middleware/errorHandler.ts
+++ b/backend/src/middleware/errorHandler.ts
@@ -38,6 +38,46 @@ export interface ErrorResponse {
  };
 }

+const BODY_WHITELIST = [
+  'documentId',
+  'id',
+  'status',
+  'fileName',
+  'fileSize',
+  'contentType',
+  'correlationId',
+];
+
+const sanitizeRequestBody = (body: any): Record<string, unknown> | string | undefined => {
+  if (!body || typeof body !== 'object') {
+    return undefined;
+  }
+
+  if (Array.isArray(body)) {
+    return '[REDACTED]';
+  }
+
+  const sanitized: Record<string, unknown> = {};
+  for (const key of BODY_WHITELIST) {
+    if (Object.prototype.hasOwnProperty.call(body, key)) {
+      sanitized[key] = body[key];
+    }
+  }
+
+  return Object.keys(sanitized).length > 0 ? sanitized : '[REDACTED]';
+};
+
+const buildRequestLogContext = (req: Request): Record<string, unknown> => ({
+  url: req.url,
+  method: req.method,
+  ip: req.ip,
+  userAgent: req.get('User-Agent'),
+  userId: (req as any).user?.id,
+  params: req.params,
+  query: req.query,
+  body: sanitizeRequestBody(req.body),
+});
+
 // Correlation ID middleware
 export const correlationIdMiddleware = (req: Request, res: Response, next: NextFunction): void => {
  const correlationId = req.headers['x-correlation-id'] as string || uuidv4();
@@ -61,16 +101,7 @@ export const errorHandler = (
  enhancedError.correlationId = correlationId;

  // Structured error logging
-  logError(enhancedError, correlationId, {
-    url: req.url,
-    method: req.method,
-    ip: req.ip,
-    userAgent: req.get('User-Agent'),
-    userId: (req as any).user?.id,
-    body: req.body,
-    params: req.params,
-    query: req.query
-  });
+  logError(enhancedError, correlationId, buildRequestLogContext(req));

  // Create error response
  const errorResponse: ErrorResponse = {
@@ -246,4 +277,4 @@ export const getUserFriendlyMessage = (error: AppError): string => {
 // Create correlation ID function
 export const createCorrelationId = (): string => {
  return uuidv4();
-}; 
+}; 
--- a/backend/src/middleware/firebaseAuth.ts
+++ b/backend/src/middleware/firebaseAuth.ts
@@ -1,24 +1,85 @@
 import { Request, Response, NextFunction } from 'express';
-import admin from 'firebase-admin';
+import admin, { ServiceAccount } from 'firebase-admin';
+import fs from 'fs';
+import { config } from '../config/env';
 import { logger } from '../utils/logger';

-// Initialize Firebase Admin if not already initialized
-if (!admin.apps.length) {
+const shouldLogAuthDebug = process.env.AUTH_DEBUG === 'true';
+
+const logAuthDebug = (message: string, meta?: Record<string, unknown>): void => {
+  if (shouldLogAuthDebug) {
+    logger.debug(message, meta);
+  }
+};
+
+const resolveServiceAccount = (): ServiceAccount | null => {
  try {
-    // For Firebase Functions, use default credentials (recommended approach)
-    admin.initializeApp({
-      projectId: 'cim-summarizer'
+    if (process.env.FIREBASE_SERVICE_ACCOUNT) {
+      return JSON.parse(process.env.FIREBASE_SERVICE_ACCOUNT) as ServiceAccount;
+    }
+  } catch (error) {
+    logger.warn('Failed to parse FIREBASE_SERVICE_ACCOUNT env value', {
+      error: error instanceof Error ? error.message : String(error),
    });
-    console.log('✅ Firebase Admin initialized with default credentials');
+  }
+
+  const serviceAccountPath = process.env.FIREBASE_SERVICE_ACCOUNT_PATH || config.googleCloud.applicationCredentials;
+  if (serviceAccountPath) {
+    try {
+      if (fs.existsSync(serviceAccountPath)) {
+        const fileContents = fs.readFileSync(serviceAccountPath, 'utf-8');
+        return JSON.parse(fileContents) as ServiceAccount;
+      }
+      logger.debug('Service account path does not exist', { serviceAccountPath });
+    } catch (error) {
+      logger.warn('Failed to load Firebase service account file', {
+        serviceAccountPath,
+        error: error instanceof Error ? error.message : String(error),
+      });
+    }
+  }
+
+  return null;
+};
+
+const initializeFirebaseAdmin = (): void => {
+  if (admin.apps.length) {
+    return;
+  }
+
+  try {
+    const firebaseOptions: admin.AppOptions = {};
+    const projectId = config.firebase.projectId || config.googleCloud.projectId;
+    if (projectId) {
+      firebaseOptions.projectId = projectId;
+    }
+
+    const serviceAccount = resolveServiceAccount();
+    if (serviceAccount) {
+      firebaseOptions.credential = admin.credential.cert(serviceAccount);
+    } else {
+      try {
+        firebaseOptions.credential = admin.credential.applicationDefault();
+        logAuthDebug('Using application default credentials for Firebase Admin');
+      } catch (credentialError) {
+        logger.warn('Application default credentials unavailable, relying on environment defaults', {
+          error: credentialError instanceof Error ? credentialError.message : String(credentialError),
+        });
+      }
+    }
+
+    admin.initializeApp(firebaseOptions);
+    logger.info('Firebase Admin initialized', { projectId: firebaseOptions.projectId });
  } catch (error) {
    const errorMessage = error instanceof Error ? error.message : 'Unknown error';
-    console.error('❌ Firebase Admin initialization failed:', errorMessage);
-    // Don't reinitialize if already initialized
+    logger.error('Firebase Admin initialization failed', { error: errorMessage });
    if (!admin.apps.length) {
      throw error;
    }
  }
-}
+};
+
+initializeFirebaseAdmin();

 export interface FirebaseAuthenticatedRequest extends Request {
  user?: admin.auth.DecodedIdToken;
@@ -30,45 +91,33 @@ export const verifyFirebaseToken = async (
  next: NextFunction
 ): Promise<void> => {
  try {
-    console.log('🔐 Authentication middleware called for:', req.method, req.url);
-    console.log('🔐 Request headers:', Object.keys(req.headers));
-    
-    // Debug Firebase Admin initialization
-    console.log('🔐 Firebase apps available:', admin.apps.length);
-    console.log('🔐 Firebase app names:', admin.apps.filter(app => app !== null).map(app => app!.name));
-    
+    logAuthDebug('Authentication middleware invoked', {
+      method: req.method,
+      path: req.url,
+      correlationId: req.correlationId,
+    });
+    logAuthDebug('Firebase admin apps', { count: admin.apps.length });
+
    const authHeader = req.headers.authorization;
-    console.log('🔐 Auth header present:', !!authHeader);
-    console.log('🔐 Auth header starts with Bearer:', authHeader?.startsWith('Bearer '));
-    
    if (!authHeader || !authHeader.startsWith('Bearer ')) {
-      console.log('❌ No valid authorization header');
      res.status(401).json({ error: 'No valid authorization header' });
      return;
    }

    const idToken = authHeader.split('Bearer ')[1];
-    console.log('🔐 Token extracted, length:', idToken?.length);
-    
    if (!idToken) {
-      console.log('❌ No token provided');
      res.status(401).json({ error: 'No token provided' });
      return;
    }

-    console.log('🔐 Attempting to verify Firebase ID token...');
-    console.log('🔐 Token preview:', idToken.substring(0, 20) + '...');
-    
    // Verify the Firebase ID token
    const decodedToken = await admin.auth().verifyIdToken(idToken, true);
-    console.log('✅ Token verified successfully for user:', decodedToken.email);
-    console.log('✅ Token UID:', decodedToken.uid);
-    console.log('✅ Token issuer:', decodedToken.iss);
+    logAuthDebug('Firebase token verified', { uid: decodedToken.uid });
    
    // Check if token is expired
    const now = Math.floor(Date.now() / 1000);
    if (decodedToken.exp && decodedToken.exp < now) {
-      logger.warn('Token expired for user:', decodedToken.uid);
+      logger.warn('Token expired for user', { uid: decodedToken.uid });
      res.status(401).json({ error: 'Token expired' });
      return;
    }
@@ -76,11 +125,11 @@ export const verifyFirebaseToken = async (
    req.user = decodedToken;
    
    // Log successful authentication
-    logger.info('Authenticated request for user:', decodedToken.email);
+    logger.info('Authenticated request', { uid: decodedToken.uid });
    
    next();
  } catch (error: any) {
-    logger.error('Firebase token verification failed:', {
+    logger.error('Firebase token verification failed', {
      error: error.message,
      code: error.code,
      ip: req.ip,
@@ -97,13 +146,15 @@ export const verifyFirebaseToken = async (
          // Try to verify without force refresh
          const decodedToken = await admin.auth().verifyIdToken(idToken, false);
          req.user = decodedToken;
-          logger.info('Recovered authentication from session for user:', decodedToken.email);
+          logger.info('Recovered authentication from session', { uid: decodedToken.uid });
          next();
          return;
        }
      }
    } catch (recoveryError) {
-      logger.debug('Session recovery failed:', recoveryError);
+      logger.debug('Session recovery failed', {
+        error: recoveryError instanceof Error ? recoveryError.message : String(recoveryError),
+      });
    }
    
    // Provide more specific error messages
@@ -140,4 +191,4 @@ export const optionalFirebaseAuth = async (
  }
  
  next();
-};
+};
--- a/backend/src/services/financialTableParser.ts
+++ b/backend/src/services/financialTableParser.ts
@@ -22,6 +22,10 @@ const PERIOD_TOKEN_REGEX = /\b(?:(?:FY[-\s]?\d{1,2})|(?:FY[-\s]?)?20\d{2}[A-Z]*|
 const MONEY_REGEX = /-?\$?\(?\d[\d,]*(?:\.\d+)?\)?\s?(?:K|M|B)?/g;
 const PERCENT_REGEX = /-?\d{1,3}(?:\.\d+)?\s?%/g;

+const resetRegex = (regex: RegExp): void => {
+  regex.lastIndex = 0;
+};
+
 const ROW_MATCHERS: Record<string, RegExp> = {
  revenue: /(revenue|net sales|total sales|top\s+line)/i,
  grossProfit: /(gross\s+profit)/i,
@@ -137,33 +141,37 @@ function yearTokensToBuckets(tokens: string[]): Array<Bucket | null> {
 * Extract numeric tokens (money/percentages) from a line or combined lines.
 * Best practice: Extract all numeric values and preserve their order to match column positions.
 */
-function extractNumericTokens(line: string, nextLine?: string): string[] {
-  const combined = `${line} ${nextLine || ''}`;
+function extractNumericTokens(line: string, additionalContent?: string): string[] {
+  const combined = additionalContent ? `${line} ${additionalContent}` : line;
+  const lineLength = line.length;
  
  // Extract money values with their positions to preserve column order
+  resetRegex(MONEY_REGEX);
  const moneyMatches = Array.from(combined.matchAll(MONEY_REGEX))
-    .map((m) => ({ value: normalizeToken(m[0]), index: m.index || 0 }))
+    .map((m) => ({ value: normalizeToken(m[0]), index: m.index ?? 0 }))
    .filter((m) => m.value && /\d/.test(m.value));
  
  // Extract percentage values with their positions
+  resetRegex(PERCENT_REGEX);
  const percentMatches = Array.from(combined.matchAll(PERCENT_REGEX))
-    .map((m) => ({ value: normalizeToken(m[0]), index: m.index || 0 }))
+    .map((m) => ({ value: normalizeToken(m[0]), index: m.index ?? 0 }))
    .filter((m) => m.value && /\d/.test(m.value));
  
-  // Combine and sort by position to preserve column order (critical for table parsing)
-  const allMatches = [...moneyMatches, ...percentMatches]
-    .sort((a, b) => a.index - b.index)
-    .map((m) => m.value);
+  const sortedMatches = [...moneyMatches, ...percentMatches].sort((a, b) => a.index - b.index);
  
-  // Remove duplicates while preserving order
-  const tokens: string[] = [];
-  for (const token of allMatches) {
-    if (!tokens.includes(token)) {
-      tokens.push(token);
-    }
+  const primaryTokens = sortedMatches
+    .filter(match => match.index < lineLength)
+    .map(match => match.value);
+  
+  if (primaryTokens.length >= 2 || !additionalContent) {
+    return primaryTokens.length > 0 ? primaryTokens : sortedMatches.map(match => match.value);
  }
  
-  return tokens;
+  const secondaryTokens = sortedMatches
+    .filter(match => match.index >= lineLength)
+    .map(match => match.value);
+  
+  return primaryTokens.concat(secondaryTokens);
 }

 function isMoneyLike(value?: string): boolean {
@@ -312,7 +320,7 @@ export function parseFinancialsFromText(fullText: string): ParsedFinancials {
        
        for (let j = lookAheadStart; j < lookAheadEnd; j++) {
          const checkLine = lines[j] || '';
-          const hasNumbers = MONEY_REGEX.test(checkLine) || PERCENT_REGEX.test(checkLine);
+          const hasNumbers = containsMoneyOrPercent(checkLine);
          
          if (!hasNumbers) continue; // Skip lines without numbers
          
@@ -441,14 +449,27 @@ export function parseFinancialsFromText(fullText: string): ParsedFinancials {
      
      // CRITICAL: Only match rows that contain BOTH the field name AND numeric values
      // This prevents matching descriptive text that just mentions financial terms
-      const hasMoneyOrPercent = MONEY_REGEX.test(combinedForTokens) || PERCENT_REGEX.test(combinedForTokens);
+      const hasMoneyOrPercent = containsMoneyOrPercent(combinedForTokens);
      if (!hasMoneyOrPercent) continue; // Skip lines without actual financial numbers

      for (const [field, matcher] of Object.entries(ROW_MATCHERS)) {
        if (!matcher.test(line)) continue;

        // Extract tokens from the combined lines
-        const tokens = extractNumericTokens(line, combinedForTokens);
+        const extraContent = `${nextLine} ${lineAfterNext}`.trim() || undefined;
+        let tokens = extractNumericTokens(line, extraContent);
+        
+        if (['grossMargin', 'ebitdaMargin', 'revenueGrowth'].includes(field)) {
+          const percentTokens = tokens.filter(isPercentLike);
+          if (percentTokens.length > 0) {
+            tokens = percentTokens;
+          }
+        } else if (['revenue', 'grossProfit', 'ebitda'].includes(field)) {
+          const moneyTokens = tokens.filter(isMoneyLike);
+          if (moneyTokens.length > 0) {
+            tokens = moneyTokens;
+          }
+        }
        
        // Only process if we found meaningful tokens (at least 2, indicating multiple periods)
        if (tokens.length < 2) {
@@ -504,3 +525,10 @@ export function parseFinancialsFromText(fullText: string): ParsedFinancials {

  return result;
 }
+const containsMoneyOrPercent = (text: string): boolean => {
+  resetRegex(MONEY_REGEX);
+  const hasMoney = MONEY_REGEX.test(text);
+  resetRegex(PERCENT_REGEX);
+  const hasPercent = PERCENT_REGEX.test(text);
+  return hasMoney || hasPercent;
+};
--- a/backend/test-fixtures/handiFoods/handi-foods-cim.txt
+++ b/backend/test-fixtures/handiFoods/handi-foods-cim.txt
--- a/backend/test-fixtures/handiFoods/handi-foods-output.txt
+++ b/backend/test-fixtures/handiFoods/handi-foods-output.txt
@@ -0,0 +1,231 @@
+BLUEPOINT Capital Partners
+CIM Review Report
+Generated: 2/23/2026 at 7:15:07 PM
+
+Deal Overview
+Geography: Toronto, Canada and Newkirk, Canada
+Reviewers: Not specified in CIM
+Deal Source: Not specified in CIM
+Cim Page Count: 81
+Date Reviewed: Not specified in CIM
+Employee Count: Not specified in CIM
+Industry Sector: Specialty Food Manufacturing / Better-For-You Baked Snacks
+Date C I M Received: Not specified in CIM
+Transaction Type: Not specified in CIM
+Target Company Name: Handi Foods
+Stated Reason For Sale: Not specified in CIM
+
+Business Description
+Key Products Services: Crackers (60% of gross sales), Chips (21% of gross sales), Pretzel Chips (17% of
+gross sales), and Puffs & Bits (2% of gross sales). The company provides end-to-end manufacturing solutions
+including R&D, product development, manufacturing, and packaging services for private label retailers and comanufacturing partnerships.
+Core Operations Summary: Handi Foods is a leading value-added provider of baked snacks in North
+America, specializing in better-for-you (BFY) baked snacks including crackers, pretzel chips, chips, and puffs &
+bits. The company operates as an end-to-end solutions partner, simplifying private label and co-manufacturing
+programs for major retailers and brand partners. With two manufacturing facilities totaling 150K+ square feet
+and recent $65M+ capital investment in high-capacity automated production lines, Handi Foods serves both
+private label (69% of sales) and brand partner (31% of sales) customers across the U.S. (83% of sales) and
+Canada (17% of sales).
+Unique Value Proposition: Market-leading position with ~60% share of private label pita cracker & pita chip
+sales in U.S. & Canada, providing end-to-end solutions partner capabilities with highly automated, scalable
+manufacturing platform and strong customer loyalty with 91%+ of sales from sole source customers.
+
+Market & Industry Analysis
+Barriers To Entry: Significant capital requirements for automated production lines ($65M+ recent investment),
+established customer relationships with sole source agreements, regulatory compliance for food manufacturing,
+and economies of scale in production.
+Key Industry Trends: Growing demand for better-for-you (BFY) baked snacks, private label expansion, and
+specialty snacking categories including sourdough, brioche, and functional formats.
+Estimated Market Size: Operating within the sizable ~$12B North American baked snack market. Near-term
+addressable market for current core and emerging product offerings estimated at ~$1,315M-$1,425M by
+2025E, growing to ~$1,925M-$2,245M by 2028P.
+Estimated Market Growth Rate: $470M-$510M addressable market growing at 5-6% CAGR for private label
+pita snacking segment where company holds ~60% market share.
+
+Financial Summary
+Quality Of Earnings: FY2025A PF Adjusted EBITDA reflects $4.5M in one-time and non-recurring
+adjustments and $0.5M in pro forma adjustments, indicating some earnings quality considerations. Quality of
+
+Earnings report prepared by BDO Canada LLP for periods FY2023A through FY2025A with adjustments for
+non-recurring and extraordinary items.
+Capital Expenditures: Total capital expenditures of $3.8M in FY2025A ($0.7M maintenance, $3.2M growth).
+Significant growth capex planned: $12.9M in FY2026E, $13.3M in FY2027P for new production line
+installations. Maintenance capex running at approximately 0.7-1.1% of revenue.
+Free Cash Flow Quality: 95%+ free cash flow conversion based on (Adj. EBITDA - Maintenance Capital
+Expenditures) / Adj. EBITDA calculation, indicating strong cash generation and high-quality earnings
+conversion.
+Revenue Growth Drivers: Volume expansion within existing top customers such as Trader Joe's and Lidl,
+increased ASP driven by turnkey program at Hain Canada, introduction of 11 inaugural Pretzelized SKUs in
+March 2024, and rapid increase of the Pretzelized business with addition of 17 new SKUs in FY2025A. Net
+sales CAGR of 25.2% from FY2021A-FY2025A.
+Margin Stability Analysis: Gross margin improved from 32.0% in FY2023A to 35.7% in FY2025A (370 bps
+improvement), driven by increased pricing in response to commodity inflation and fixed operating cost leverage.
+EBITDA margin expanded from 26.8% in FY2023A to 31.9% in FY2025A (510 bps improvement),
+demonstrating strong operational leverage and margin expansion capability.
+Working Capital Intensity: Not specifically detailed in CIM, but freight, transportation, and commissions
+decreased from $4.5M to $4.0M despite revenue growth, suggesting improving working capital efficiency.
+
+Management Team Overview
+Key Leaders: Brian Arbique as CEO since 2017, John Dobie as VP of Operations since 2017, Marc Diamant
+as CFO in 2024.
+Organizational Structure: Not specified in CIM
+Post Transaction Intentions: Not specified in CIM
+Management Quality Assessment: Experienced management team with Brian Arbique as CEO since 2017
+and John Dobie as VP of Operations since 2017, indicating 8+ years of tenure during the company's
+transformation and growth phase. Recent addition of Marc Diamant as CFO in 2024 suggests
+professionalization of finance function. Management has overseen successful transition from pita bread to BFY
+snacks, significant capacity expansion, and strong financial performance.
+
+Preliminary Investment Thesis
+Key Attractions: 1. Market-leading position with ~60% share of private label pita cracker & pita chip sales in
+U.S. & Canada, providing significant competitive moat and pricing power in a $470M-$510M addressable
+market growing at 5-6% CAGR. This dominant position supports sustainable revenue growth and margin
+expansion opportunities. 2. Exceptional financial performance with 25.2% net sales CAGR from FY2021AFY2025A, reaching $90.1M revenue in FY2025A, and EBITDA margin expansion from 26.8% to 31.9% over
+two years, demonstrating strong operational leverage and scalability. 3. Transformative customer relationship
+with Pretzelized, growing from first order in March 2024 to projected $7.2M KGs volume by FY2030P under
+exclusive long-term sole source agreement, representing significant embedded growth with high-growth brand
+partner. 4. Highly automated, scalable manufacturing platform with $65M+ recent capex investment in highcapacity production lines, providing ample capacity for growth and operational efficiency advantages over
+competitors. 5. Strong customer loyalty with 91%+ of FY2025A gross sales from sole source customers and
+average top 10 customer tenure of ~8 years, indicating sticky customer relationships and predictable revenue
+base. 6. Diversified and attractive business mix across channels (Grocery 41%, Mass 35%, Private Label
+Grocers 14%, Club 10%) and geographies (U.S. 83%, Canada 17%), reducing concentration risk while
+maintaining market leadership. 7. Proven innovation capabilities with 35 new SKUs launched since FY2021A
+and robust R&D pipeline, including emerging products in high-growth categories like sourdough, brioche, and
+functional formats with estimated $14.5M FY2030P new product revenue opportunity. 8. Exceptional cash
+generation with 95%+ free cash flow conversion, providing strong cash returns and flexibility for growth
+investments and potential acquisitions.
+Potential Risks: 1. Customer concentration risk (Operational): While 91%+ of sales from sole source
+customers provides stability, loss of any major customer could significantly impact revenue. Probability: Low,
+Impact: High. Mitigation: Long-term contracts and strong customer satisfaction scores. Deal-breaker: No, but
+requires careful contract review. 2. Commodity price volatility (Financial): Direct materials represent significant
+cost component, and commodity inflation could pressure margins if not passed through to customers.
+Probability: Medium, Impact: Medium. Mitigation: Pricing mechanisms and customer relationships support price
+increases. Deal-breaker: No. 3. Capacity utilization risk (Operational): Current utilization at 63% in FY2025A
+with significant capex planned for new lines, creating risk of underutilized assets if growth doesn't materialize.
+
+Probability: Medium, Impact: Medium. Mitigation: Strong customer demand visibility and contracted growth.
+Deal-breaker: No. 4. Pretzelized dependence risk (Operational): Rapid growth tied to single brand partner
+Pretzelized creates concentration risk if relationship deteriorates or brand fails to achieve projected growth.
+Probability: Low, Impact: High. Mitigation: Exclusive long-term contract and strong collaborative relationship.
+Deal-breaker: No, but requires deep customer diligence. 5. Private label competitive dynamics (Market): Private
+label customers could potentially switch suppliers or bring production in-house, threatening market position.
+Probability: Low, Impact: Medium. Mitigation: Sole source agreements and high switching costs. Deal-breaker:
+No. 6. Food safety and regulatory risk (Regulatory): Food manufacturing subject to strict regulations and
+potential recalls could damage reputation and financial performance. Probability: Low, Impact: High. Mitigation:
+Established quality systems and insurance coverage. Deal-breaker: No, but requires operational diligence. 7.
+Cross-border operations complexity (Operational): Operating in both U.S. and Canada creates currency,
+regulatory, and operational complexity. Probability: Medium, Impact: Low. Mitigation: Experienced management
+and established operations. Deal-breaker: No.
+Value Creation Levers: 1. Pricing optimization and margin expansion: Leverage market-leading position to
+implement 2-3% price increases across product portfolio, potentially adding $1.8-2.7M annual revenue with
+high flow-through to EBITDA given fixed cost base. Implementation: BPCP pricing expertise and market
+analysis. Timeline: 12-18 months. Confidence: High. 2. Operational efficiency improvements: Optimize
+production scheduling, reduce changeover times, and improve labor productivity through BPCP's operational
+expertise, targeting 100-200 bps EBITDA margin improvement worth $0.9-1.8M annually. Timeline: 18-24
+months. Confidence: Medium-High. 3. M&A consolidation strategy: Acquire complementary baked snack
+manufacturers to expand product portfolio, customer base, and geographic reach, with illustrative targets
+ranging from $15M-$40M EBITDA providing platform for 2-3x revenue growth. Implementation: BPCP's M&A
+expertise. Timeline: 12-36 months. Confidence: Medium. 4. New product development acceleration: Leverage
+innovation pipeline including sourdough, brioche, and functional formats to capture estimated $14.5M FY2030P
+revenue opportunity, with BPCP supporting go-to-market strategy and customer development. Timeline: 24-36
+months. Confidence: Medium. 5. Customer diversification and expansion: Utilize BPCP's consumer industry
+relationships to accelerate new customer wins and expand wallet share with existing customers, targeting
+15-20% revenue growth through customer expansion. Timeline: 18-30 months. Confidence: Medium-High. 6.
+Supply chain optimization: Implement BPCP's supply chain expertise to optimize procurement, reduce direct
+material costs by 50-100 bps, and improve working capital efficiency, potentially adding $0.5-0.9M annual
+EBITDA. Timeline: 12-24 months. Confidence: Medium. 7. Technology and automation enhancement: Further
+automate production processes and implement data analytics to improve yield, reduce waste, and optimize
+capacity utilization, targeting 2-3% improvement in gross margins. Timeline: 24-36 months. Confidence:
+Medium. 8. International expansion: Leverage cross-border capabilities to expand into additional international
+markets beyond current U.S./Canada footprint, potentially adding 10-15% revenue growth over 3-5 years.
+Timeline: 36-60 months. Confidence: Low-Medium.
+Alignment With Fund Strategy: EBITDA Range Fit (Score: 10/10): LTM Adjusted EBITDA of $28.8M CAD
+(~$21M USD) fits perfectly within BPCP's 5+MM EBITDA target range. Industry Focus (Score: 9/10): Specialty
+food manufacturing in consumer end market aligns strongly with BPCP's consumer focus, though industrial
+component is limited. Geographic Preferences (Score: 4/10): Toronto and Newkirk, Canada locations are not
+within driving distance of Cleveland or Charlotte, presenting geographic misalignment challenge. Value
+Creation Expertise Alignment (Score: 9/10): Strong alignment with BPCP's M&A capabilities (fragmented
+market consolidation opportunity), technology & automation (recent $65M investment platform), supply chain
+optimization (procurement and vertical integration opportunities), and operational improvements (capacity
+utilization, efficiency gains). Founder/Family Ownership (Score: 8/10): Founded by first-generation immigrant in
+1977 with family heritage, though current ownership by Ironbridge Equity Partners since 2022 reduces founder
+involvement. Market Position (Score: 9/10): Leading platform with defensible competitive position and growth
+runway aligns with BPCP's preference for market leaders. Financial Profile (Score: 9/10): Strong growth (25.2%
+CAGR), margin expansion (18.6% to 31.9%), and cash generation (95%+ FCF conversion) align with BPCP's
+financial criteria. Overall Alignment Score: 8.3/10. Strong strategic fit across most criteria with primary concern
+being geographic distance from BPCP's preferred Cleveland/Charlotte proximity. The company's scale, market
+position, growth profile, and value creation opportunities align well with BPCP's investment strategy despite
+geographic considerations.
+
+Key Questions & Next Steps
+Critical Questions: 1. What is the detailed ownership structure and are current owners founder/family-owned
+as preferred by BPCP? This is critical for understanding seller motivations, transaction structure, and alignment
+with BPCP's investment preferences for founder/family-owned businesses. Priority: High Impact. 2. What are the
+specific terms, duration, and renewal provisions of the exclusive Pretzelized contract given its importance to
+growth projections? With Pretzelized representing significant projected growth, understanding contract
+protection and renewal risk is essential for validating growth assumptions and investment thesis. Priority: Dealbreaker. 3. What is the detailed management team composition, experience, and post-transaction retention
+
+plans? Given the operational complexity and growth plans, management quality and retention is critical for
+successful value creation and operational execution. Priority: High Impact. 4. What are the specific capacity
+utilization rates by production line and facility, and how does planned capex align with contracted customer
+demand? With 63% current utilization and $26M+ planned capex, understanding capacity-demand alignment is
+crucial for validating growth capex requirements and returns. Priority: High Impact. 5. What is the customer
+contract renewal schedule and historical retention rates for the next 24 months? With 91%+ sole source
+customer relationships, understanding renewal timing and retention risk is essential for revenue predictability
+and valuation support. Priority: High Impact. 6. What are the detailed EBITDA adjustments and quality of
+earnings issues identified in the BDO report? With $4.5M in one-time adjustments in FY2025A, understanding
+earnings quality is critical for normalized EBITDA assessment and valuation. Priority: High Impact. 7. What is
+the competitive response risk if Handi Foods continues taking market share in private label pita snacking?
+Understanding competitive dynamics and potential retaliation is important for assessing sustainability of market
+leadership and pricing power. Priority: Medium Impact. 8. What are the specific food safety protocols, insurance
+coverage, and historical recall/quality issues? Given food manufacturing risks, understanding quality systems
+and risk mitigation is essential for operational due diligence. Priority: Medium Impact.
+Proposed Next Steps: 1. Schedule comprehensive management presentation to assess team quality,
+experience, and post-transaction intentions, including detailed discussion of growth strategy and operational
+capabilities. Involve: Investment team lead, operating partner. Timeline: Within 1 week. 2. Conduct detailed
+customer reference calls with top 5 customers to validate relationship strength, contract terms, renewal
+likelihood, and growth potential. Focus particularly on Pretzelized relationship and contract terms. Involve:
+Investment team, industry expert. Timeline: Within 2 weeks. 3. Engage food industry expert and former private
+label executive to assess competitive positioning, market dynamics, and growth sustainability in baked snack
+categories. Involve: Investment team, external advisor. Timeline: Within 2 weeks. 4. Review detailed BDO
+Quality of Earnings report to understand EBITDA adjustments, accounting policies, and earnings quality issues.
+Involve: Investment team, accounting advisor. Timeline: Within 1 week. 5. Conduct facility tours of both Newkirk
+and Mississauga operations to assess manufacturing capabilities, automation levels, capacity utilization, and
+expansion plans. Involve: Investment team, operations expert. Timeline: Within 3 weeks. 6. Analyze detailed
+customer contracts, renewal schedules, and pricing mechanisms to validate revenue predictability and
+customer retention assumptions. Involve: Investment team, legal counsel. Timeline: Within 2 weeks. 7. Develop
+preliminary value creation plan focusing on pricing optimization, operational improvements, and M&A strategy
+with specific target identification. Involve: Investment team, operating partners. Timeline: Within 3 weeks. 8.
+Prepare detailed financial model incorporating capacity analysis, customer growth projections, and sensitivity
+analysis for key assumptions. Involve: Investment team, financial modeling expert. Timeline: Within 2 weeks.
+Missing Information: 1. Detailed management team bios, experience, and organizational structure - Critical for
+assessing execution capability and post-transaction planning. This impacts investment decision by determining
+management retention needs and operational risk assessment. Priority: High Impact. 2. Ownership structure
+and seller motivations - Essential for understanding transaction dynamics, seller expectations, and alignment
+with BPCP preferences for founder/family-owned businesses. Missing this makes deal structuring and
+negotiation strategy difficult. Priority: High Impact. 3. Detailed customer contract terms, renewal schedules, and
+pricing mechanisms - Critical for understanding revenue predictability, pricing power, and customer retention
+risk. This directly impacts revenue projections and valuation multiples. Priority: High Impact. 4. Working capital
+analysis and cash flow statement details - Important for understanding cash generation quality, working capital
+requirements, and free cash flow sustainability. Missing this limits financial modeling accuracy. Priority: Medium
+Impact. 5. Competitive landscape analysis and market share data beyond pita snacking - Needed to understand
+broader competitive positioning and market dynamics across all product categories. This impacts growth
+strategy and competitive risk assessment. Priority: Medium Impact. 6. Detailed capex plans, equipment
+specifications, and capacity analysis by facility - Important for validating growth capex requirements and returns
+on invested capital. Missing this limits assessment of capital efficiency and growth sustainability. Priority:
+Medium Impact. 7. Supply chain analysis including key suppliers, procurement strategies, and commodity
+hedging - Critical for understanding cost structure stability and supply chain risk. This impacts margin
+predictability and operational risk assessment. Priority: Medium Impact. 8. Historical M&A activity and
+integration capabilities - Important for assessing platform acquisition potential and management's M&A
+execution track record. Missing this limits value creation strategy development. Priority: Nice-to-know.
+Preliminary Recommendation: Proceed with Caution
+Rationale For Recommendation: Strong financial performance with 25.2% revenue CAGR and expanding
+EBITDA margins demonstrates scalable business model. Market-leading position with ~60% share in growing
+private label pita snacking market provides competitive moat and pricing power. Excellent strategic fit with
+BPCP's consumer focus, EBITDA scale requirements, and value creation expertise in M&A, operations, and
+supply chain optimization. High-quality cash generation with 95%+ free cash flow conversion supports attractive
+returns potential.
+
+BLUEPOINT Capital Partners | CIM Document Processor | Confidential
+
+