Add acceptance tests and align defaults to Sonnet 4
This commit is contained in:
@@ -30,7 +30,8 @@ DOCUMENT_AI_LOCATION=us
|
||||
DOCUMENT_AI_PROCESSOR_ID=your-processor-id
|
||||
GCS_BUCKET_NAME=your-gcs-bucket-name
|
||||
DOCUMENT_AI_OUTPUT_BUCKET_NAME=your-document-ai-output-bucket
|
||||
GOOGLE_APPLICATION_CREDENTIALS=./serviceAccountKey.json
|
||||
# Leave blank when using Firebase Functions secrets/Application Default Credentials
|
||||
GOOGLE_APPLICATION_CREDENTIALS=
|
||||
|
||||
# Processing Strategy
|
||||
PROCESSING_STRATEGY=document_ai_genkit
|
||||
@@ -72,4 +73,4 @@ AGENTIC_RAG_CONSISTENCY_CHECK=true
|
||||
# Monitoring and Logging
|
||||
AGENTIC_RAG_DETAILED_LOGGING=true
|
||||
AGENTIC_RAG_PERFORMANCE_TRACKING=true
|
||||
AGENTIC_RAG_ERROR_REPORTING=true
|
||||
AGENTIC_RAG_ERROR_REPORTING=true
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
type ReferenceFact = {
|
||||
description: string;
|
||||
tokens: string[];
|
||||
};
|
||||
|
||||
const referenceFacts: ReferenceFact[] = [
|
||||
{
|
||||
description: 'Leading value-added positioning',
|
||||
tokens: ['leading', 'value-added', 'baked snacks']
|
||||
},
|
||||
{
|
||||
description: 'North American baked snack market size',
|
||||
tokens: ['~$12b', 'north american', 'baked snack']
|
||||
},
|
||||
{
|
||||
description: 'Private label and co-manufacturing focus',
|
||||
tokens: ['private label', 'co-manufacturing']
|
||||
},
|
||||
{
|
||||
description: 'Facility scale',
|
||||
tokens: ['150k+']
|
||||
}
|
||||
];
|
||||
|
||||
const requiredFields = [
|
||||
'geography:',
|
||||
'industry sector:',
|
||||
'key products services:'
|
||||
];
|
||||
|
||||
const repoRoot = path.resolve(__dirname, '../../../..');
|
||||
const fixturesDir = path.join(repoRoot, 'backend', 'test-fixtures', 'handiFoods');
|
||||
const cimTextPath = path.join(fixturesDir, 'handi-foods-cim.txt');
|
||||
const outputTextPath = path.join(fixturesDir, 'handi-foods-output.txt');
|
||||
|
||||
describe('Acceptance: Handi Foods CIM vs Generated Output', () => {
|
||||
let cimNormalized: string;
|
||||
let outputNormalized: string;
|
||||
let outputLines: string[];
|
||||
|
||||
beforeAll(() => {
|
||||
const normalize = (text: string) => text.replace(/\s+/g, ' ').toLowerCase();
|
||||
const cimRaw = fs.readFileSync(cimTextPath, 'utf-8');
|
||||
const outputRaw = fs.readFileSync(outputTextPath, 'utf-8');
|
||||
cimNormalized = normalize(cimRaw);
|
||||
outputNormalized = normalize(outputRaw);
|
||||
outputLines = outputRaw
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean);
|
||||
});
|
||||
|
||||
it('verifies each reference fact exists in the CIM and in the generated output', () => {
|
||||
for (const fact of referenceFacts) {
|
||||
for (const token of fact.tokens) {
|
||||
expect(cimNormalized).toContain(token);
|
||||
expect(outputNormalized).toContain(token);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('ensures key fields are resolved instead of falling back to "Not specified in CIM"', () => {
|
||||
const findFieldValue = (label: string) => {
|
||||
const lowerLabel = label.toLowerCase();
|
||||
const line = outputLines.find((l) => l.toLowerCase().startsWith(lowerLabel));
|
||||
return line ? line.slice(label.length).trim() : '';
|
||||
};
|
||||
|
||||
for (const label of requiredFields) {
|
||||
const value = findFieldValue(label);
|
||||
expect(value.length).toBeGreaterThan(0);
|
||||
expect(value.toLowerCase()).not.toContain('not specified in cim');
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -93,7 +93,7 @@ const envSchema = Joi.object({
|
||||
DOCUMENT_AI_PROCESSOR_ID: Joi.string().required(),
|
||||
GCS_BUCKET_NAME: Joi.string().required(),
|
||||
DOCUMENT_AI_OUTPUT_BUCKET_NAME: Joi.string().required(),
|
||||
GOOGLE_APPLICATION_CREDENTIALS: Joi.string().default('./serviceAccountKey.json'),
|
||||
GOOGLE_APPLICATION_CREDENTIALS: Joi.string().allow('').default(''),
|
||||
|
||||
// Vector Database Configuration
|
||||
VECTOR_PROVIDER: Joi.string().valid('supabase', 'pinecone').default('supabase'),
|
||||
@@ -137,7 +137,7 @@ const envSchema = Joi.object({
|
||||
then: Joi.string().optional(), // Optional if using BYOK
|
||||
otherwise: Joi.string().allow('').optional()
|
||||
}),
|
||||
LLM_MODEL: Joi.string().default('gpt-4'),
|
||||
LLM_MODEL: Joi.string().default('claude-sonnet-4-20250514'),
|
||||
LLM_MAX_TOKENS: Joi.number().default(16000),
|
||||
LLM_TEMPERATURE: Joi.number().min(0).max(2).default(0.1),
|
||||
LLM_PROMPT_BUFFER: Joi.number().default(500),
|
||||
@@ -308,17 +308,16 @@ export const config = {
|
||||
openrouterApiKey: process.env['OPENROUTER_API_KEY'] || envVars['OPENROUTER_API_KEY'],
|
||||
openrouterUseBYOK: envVars['OPENROUTER_USE_BYOK'] === 'true', // Use BYOK (Bring Your Own Key)
|
||||
|
||||
// Model Selection - Using latest Claude 4.5 models (Oct 2025)
|
||||
// Claude Sonnet 4.5 is recommended for best balance of intelligence, speed, and cost
|
||||
// Supports structured outputs for guaranteed JSON schema compliance
|
||||
// NOTE: Claude Sonnet 4.5 offers improved accuracy and reasoning for full-document processing
|
||||
model: envVars['LLM_MODEL'] || 'claude-sonnet-4-5-20250929', // Primary model (Claude Sonnet 4.5 - latest and most accurate)
|
||||
fastModel: envVars['LLM_FAST_MODEL'] || 'claude-3-5-haiku-latest', // Fast model (Claude Haiku 3.5 latest - fastest and cheapest)
|
||||
// Model Selection - Unified on Claude Sonnet 4 (May 2025 release)
|
||||
// Claude Sonnet 4 20250514 is the currently supported, non-deprecated variant
|
||||
// This keeps multi-pass extraction aligned with the same reasoning model across passes
|
||||
model: envVars['LLM_MODEL'] || 'claude-sonnet-4-20250514', // Primary model (Claude Sonnet 4)
|
||||
fastModel: envVars['LLM_FAST_MODEL'] || 'claude-sonnet-4-20250514', // Fast model aligned with Sonnet 4
|
||||
fallbackModel: envVars['LLM_FALLBACK_MODEL'] || 'gpt-4o', // Fallback for creativity
|
||||
|
||||
// Task-specific model selection
|
||||
// Use Haiku 3.5 for financial extraction - faster and cheaper, with validation fallback to Sonnet
|
||||
financialModel: envVars['LLM_FINANCIAL_MODEL'] || 'claude-3-5-haiku-latest', // Fast model for financial extraction (Haiku 3.5 latest)
|
||||
// Use Sonnet 4 for financial extraction to avoid deprecated Haiku endpoints
|
||||
financialModel: envVars['LLM_FINANCIAL_MODEL'] || 'claude-sonnet-4-20250514', // Financial extraction model (Claude Sonnet 4)
|
||||
creativeModel: envVars['LLM_CREATIVE_MODEL'] || 'gpt-4o', // Best for creative content
|
||||
reasoningModel: envVars['LLM_REASONING_MODEL'] || 'claude-opus-4-1-20250805', // Best for complex reasoning (Opus 4.1)
|
||||
|
||||
@@ -449,4 +448,4 @@ export const getConfigHealth = () => {
|
||||
};
|
||||
};
|
||||
|
||||
export default config;
|
||||
export default config;
|
||||
|
||||
@@ -41,10 +41,11 @@ export const documentController = {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate file size (max 50MB)
|
||||
if (fileSize > 50 * 1024 * 1024) {
|
||||
const maxFileSize = config.upload.maxFileSize || 50 * 1024 * 1024;
|
||||
if (fileSize > maxFileSize) {
|
||||
const maxFileSizeMb = Math.round(maxFileSize / (1024 * 1024));
|
||||
res.status(400).json({
|
||||
error: 'File size exceeds 50MB limit',
|
||||
error: `File size exceeds ${maxFileSizeMb}MB limit`,
|
||||
correlationId: req.correlationId
|
||||
});
|
||||
return;
|
||||
@@ -1013,4 +1014,4 @@ export const documentController = {
|
||||
throw new Error('Failed to get document text');
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -38,6 +38,46 @@ export interface ErrorResponse {
|
||||
};
|
||||
}
|
||||
|
||||
const BODY_WHITELIST = [
|
||||
'documentId',
|
||||
'id',
|
||||
'status',
|
||||
'fileName',
|
||||
'fileSize',
|
||||
'contentType',
|
||||
'correlationId',
|
||||
];
|
||||
|
||||
const sanitizeRequestBody = (body: any): Record<string, unknown> | string | undefined => {
|
||||
if (!body || typeof body !== 'object') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (Array.isArray(body)) {
|
||||
return '[REDACTED]';
|
||||
}
|
||||
|
||||
const sanitized: Record<string, unknown> = {};
|
||||
for (const key of BODY_WHITELIST) {
|
||||
if (Object.prototype.hasOwnProperty.call(body, key)) {
|
||||
sanitized[key] = body[key];
|
||||
}
|
||||
}
|
||||
|
||||
return Object.keys(sanitized).length > 0 ? sanitized : '[REDACTED]';
|
||||
};
|
||||
|
||||
const buildRequestLogContext = (req: Request): Record<string, unknown> => ({
|
||||
url: req.url,
|
||||
method: req.method,
|
||||
ip: req.ip,
|
||||
userAgent: req.get('User-Agent'),
|
||||
userId: (req as any).user?.id,
|
||||
params: req.params,
|
||||
query: req.query,
|
||||
body: sanitizeRequestBody(req.body),
|
||||
});
|
||||
|
||||
// Correlation ID middleware
|
||||
export const correlationIdMiddleware = (req: Request, res: Response, next: NextFunction): void => {
|
||||
const correlationId = req.headers['x-correlation-id'] as string || uuidv4();
|
||||
@@ -61,16 +101,7 @@ export const errorHandler = (
|
||||
enhancedError.correlationId = correlationId;
|
||||
|
||||
// Structured error logging
|
||||
logError(enhancedError, correlationId, {
|
||||
url: req.url,
|
||||
method: req.method,
|
||||
ip: req.ip,
|
||||
userAgent: req.get('User-Agent'),
|
||||
userId: (req as any).user?.id,
|
||||
body: req.body,
|
||||
params: req.params,
|
||||
query: req.query
|
||||
});
|
||||
logError(enhancedError, correlationId, buildRequestLogContext(req));
|
||||
|
||||
// Create error response
|
||||
const errorResponse: ErrorResponse = {
|
||||
@@ -246,4 +277,4 @@ export const getUserFriendlyMessage = (error: AppError): string => {
|
||||
// Create correlation ID function
|
||||
export const createCorrelationId = (): string => {
|
||||
return uuidv4();
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,24 +1,85 @@
|
||||
import { Request, Response, NextFunction } from 'express';
|
||||
import admin from 'firebase-admin';
|
||||
import admin, { ServiceAccount } from 'firebase-admin';
|
||||
import fs from 'fs';
|
||||
import { config } from '../config/env';
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
// Initialize Firebase Admin if not already initialized
|
||||
if (!admin.apps.length) {
|
||||
const shouldLogAuthDebug = process.env.AUTH_DEBUG === 'true';
|
||||
|
||||
const logAuthDebug = (message: string, meta?: Record<string, unknown>): void => {
|
||||
if (shouldLogAuthDebug) {
|
||||
logger.debug(message, meta);
|
||||
}
|
||||
};
|
||||
|
||||
const resolveServiceAccount = (): ServiceAccount | null => {
|
||||
try {
|
||||
// For Firebase Functions, use default credentials (recommended approach)
|
||||
admin.initializeApp({
|
||||
projectId: 'cim-summarizer'
|
||||
if (process.env.FIREBASE_SERVICE_ACCOUNT) {
|
||||
return JSON.parse(process.env.FIREBASE_SERVICE_ACCOUNT) as ServiceAccount;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('Failed to parse FIREBASE_SERVICE_ACCOUNT env value', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
console.log('✅ Firebase Admin initialized with default credentials');
|
||||
}
|
||||
|
||||
const serviceAccountPath = process.env.FIREBASE_SERVICE_ACCOUNT_PATH || config.googleCloud.applicationCredentials;
|
||||
if (serviceAccountPath) {
|
||||
try {
|
||||
if (fs.existsSync(serviceAccountPath)) {
|
||||
const fileContents = fs.readFileSync(serviceAccountPath, 'utf-8');
|
||||
return JSON.parse(fileContents) as ServiceAccount;
|
||||
}
|
||||
logger.debug('Service account path does not exist', { serviceAccountPath });
|
||||
} catch (error) {
|
||||
logger.warn('Failed to load Firebase service account file', {
|
||||
serviceAccountPath,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const initializeFirebaseAdmin = (): void => {
|
||||
if (admin.apps.length) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const firebaseOptions: admin.AppOptions = {};
|
||||
const projectId = config.firebase.projectId || config.googleCloud.projectId;
|
||||
if (projectId) {
|
||||
firebaseOptions.projectId = projectId;
|
||||
}
|
||||
|
||||
const serviceAccount = resolveServiceAccount();
|
||||
if (serviceAccount) {
|
||||
firebaseOptions.credential = admin.credential.cert(serviceAccount);
|
||||
} else {
|
||||
try {
|
||||
firebaseOptions.credential = admin.credential.applicationDefault();
|
||||
logAuthDebug('Using application default credentials for Firebase Admin');
|
||||
} catch (credentialError) {
|
||||
logger.warn('Application default credentials unavailable, relying on environment defaults', {
|
||||
error: credentialError instanceof Error ? credentialError.message : String(credentialError),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
admin.initializeApp(firebaseOptions);
|
||||
logger.info('Firebase Admin initialized', { projectId: firebaseOptions.projectId });
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error('❌ Firebase Admin initialization failed:', errorMessage);
|
||||
// Don't reinitialize if already initialized
|
||||
logger.error('Firebase Admin initialization failed', { error: errorMessage });
|
||||
if (!admin.apps.length) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
initializeFirebaseAdmin();
|
||||
|
||||
export interface FirebaseAuthenticatedRequest extends Request {
|
||||
user?: admin.auth.DecodedIdToken;
|
||||
@@ -30,45 +91,33 @@ export const verifyFirebaseToken = async (
|
||||
next: NextFunction
|
||||
): Promise<void> => {
|
||||
try {
|
||||
console.log('🔐 Authentication middleware called for:', req.method, req.url);
|
||||
console.log('🔐 Request headers:', Object.keys(req.headers));
|
||||
|
||||
// Debug Firebase Admin initialization
|
||||
console.log('🔐 Firebase apps available:', admin.apps.length);
|
||||
console.log('🔐 Firebase app names:', admin.apps.filter(app => app !== null).map(app => app!.name));
|
||||
|
||||
logAuthDebug('Authentication middleware invoked', {
|
||||
method: req.method,
|
||||
path: req.url,
|
||||
correlationId: req.correlationId,
|
||||
});
|
||||
logAuthDebug('Firebase admin apps', { count: admin.apps.length });
|
||||
|
||||
const authHeader = req.headers.authorization;
|
||||
console.log('🔐 Auth header present:', !!authHeader);
|
||||
console.log('🔐 Auth header starts with Bearer:', authHeader?.startsWith('Bearer '));
|
||||
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
||||
console.log('❌ No valid authorization header');
|
||||
res.status(401).json({ error: 'No valid authorization header' });
|
||||
return;
|
||||
}
|
||||
|
||||
const idToken = authHeader.split('Bearer ')[1];
|
||||
console.log('🔐 Token extracted, length:', idToken?.length);
|
||||
|
||||
if (!idToken) {
|
||||
console.log('❌ No token provided');
|
||||
res.status(401).json({ error: 'No token provided' });
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('🔐 Attempting to verify Firebase ID token...');
|
||||
console.log('🔐 Token preview:', idToken.substring(0, 20) + '...');
|
||||
|
||||
// Verify the Firebase ID token
|
||||
const decodedToken = await admin.auth().verifyIdToken(idToken, true);
|
||||
console.log('✅ Token verified successfully for user:', decodedToken.email);
|
||||
console.log('✅ Token UID:', decodedToken.uid);
|
||||
console.log('✅ Token issuer:', decodedToken.iss);
|
||||
logAuthDebug('Firebase token verified', { uid: decodedToken.uid });
|
||||
|
||||
// Check if token is expired
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
if (decodedToken.exp && decodedToken.exp < now) {
|
||||
logger.warn('Token expired for user:', decodedToken.uid);
|
||||
logger.warn('Token expired for user', { uid: decodedToken.uid });
|
||||
res.status(401).json({ error: 'Token expired' });
|
||||
return;
|
||||
}
|
||||
@@ -76,11 +125,11 @@ export const verifyFirebaseToken = async (
|
||||
req.user = decodedToken;
|
||||
|
||||
// Log successful authentication
|
||||
logger.info('Authenticated request for user:', decodedToken.email);
|
||||
logger.info('Authenticated request', { uid: decodedToken.uid });
|
||||
|
||||
next();
|
||||
} catch (error: any) {
|
||||
logger.error('Firebase token verification failed:', {
|
||||
logger.error('Firebase token verification failed', {
|
||||
error: error.message,
|
||||
code: error.code,
|
||||
ip: req.ip,
|
||||
@@ -97,13 +146,15 @@ export const verifyFirebaseToken = async (
|
||||
// Try to verify without force refresh
|
||||
const decodedToken = await admin.auth().verifyIdToken(idToken, false);
|
||||
req.user = decodedToken;
|
||||
logger.info('Recovered authentication from session for user:', decodedToken.email);
|
||||
logger.info('Recovered authentication from session', { uid: decodedToken.uid });
|
||||
next();
|
||||
return;
|
||||
}
|
||||
}
|
||||
} catch (recoveryError) {
|
||||
logger.debug('Session recovery failed:', recoveryError);
|
||||
logger.debug('Session recovery failed', {
|
||||
error: recoveryError instanceof Error ? recoveryError.message : String(recoveryError),
|
||||
});
|
||||
}
|
||||
|
||||
// Provide more specific error messages
|
||||
@@ -140,4 +191,4 @@ export const optionalFirebaseAuth = async (
|
||||
}
|
||||
|
||||
next();
|
||||
};
|
||||
};
|
||||
|
||||
@@ -22,6 +22,10 @@ const PERIOD_TOKEN_REGEX = /\b(?:(?:FY[-\s]?\d{1,2})|(?:FY[-\s]?)?20\d{2}[A-Z]*|
|
||||
const MONEY_REGEX = /-?\$?\(?\d[\d,]*(?:\.\d+)?\)?\s?(?:K|M|B)?/g;
|
||||
const PERCENT_REGEX = /-?\d{1,3}(?:\.\d+)?\s?%/g;
|
||||
|
||||
const resetRegex = (regex: RegExp): void => {
|
||||
regex.lastIndex = 0;
|
||||
};
|
||||
|
||||
const ROW_MATCHERS: Record<string, RegExp> = {
|
||||
revenue: /(revenue|net sales|total sales|top\s+line)/i,
|
||||
grossProfit: /(gross\s+profit)/i,
|
||||
@@ -137,33 +141,37 @@ function yearTokensToBuckets(tokens: string[]): Array<Bucket | null> {
|
||||
* Extract numeric tokens (money/percentages) from a line or combined lines.
|
||||
* Best practice: Extract all numeric values and preserve their order to match column positions.
|
||||
*/
|
||||
function extractNumericTokens(line: string, nextLine?: string): string[] {
|
||||
const combined = `${line} ${nextLine || ''}`;
|
||||
function extractNumericTokens(line: string, additionalContent?: string): string[] {
|
||||
const combined = additionalContent ? `${line} ${additionalContent}` : line;
|
||||
const lineLength = line.length;
|
||||
|
||||
// Extract money values with their positions to preserve column order
|
||||
resetRegex(MONEY_REGEX);
|
||||
const moneyMatches = Array.from(combined.matchAll(MONEY_REGEX))
|
||||
.map((m) => ({ value: normalizeToken(m[0]), index: m.index || 0 }))
|
||||
.map((m) => ({ value: normalizeToken(m[0]), index: m.index ?? 0 }))
|
||||
.filter((m) => m.value && /\d/.test(m.value));
|
||||
|
||||
// Extract percentage values with their positions
|
||||
resetRegex(PERCENT_REGEX);
|
||||
const percentMatches = Array.from(combined.matchAll(PERCENT_REGEX))
|
||||
.map((m) => ({ value: normalizeToken(m[0]), index: m.index || 0 }))
|
||||
.map((m) => ({ value: normalizeToken(m[0]), index: m.index ?? 0 }))
|
||||
.filter((m) => m.value && /\d/.test(m.value));
|
||||
|
||||
// Combine and sort by position to preserve column order (critical for table parsing)
|
||||
const allMatches = [...moneyMatches, ...percentMatches]
|
||||
.sort((a, b) => a.index - b.index)
|
||||
.map((m) => m.value);
|
||||
const sortedMatches = [...moneyMatches, ...percentMatches].sort((a, b) => a.index - b.index);
|
||||
|
||||
// Remove duplicates while preserving order
|
||||
const tokens: string[] = [];
|
||||
for (const token of allMatches) {
|
||||
if (!tokens.includes(token)) {
|
||||
tokens.push(token);
|
||||
}
|
||||
const primaryTokens = sortedMatches
|
||||
.filter(match => match.index < lineLength)
|
||||
.map(match => match.value);
|
||||
|
||||
if (primaryTokens.length >= 2 || !additionalContent) {
|
||||
return primaryTokens.length > 0 ? primaryTokens : sortedMatches.map(match => match.value);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
const secondaryTokens = sortedMatches
|
||||
.filter(match => match.index >= lineLength)
|
||||
.map(match => match.value);
|
||||
|
||||
return primaryTokens.concat(secondaryTokens);
|
||||
}
|
||||
|
||||
function isMoneyLike(value?: string): boolean {
|
||||
@@ -312,7 +320,7 @@ export function parseFinancialsFromText(fullText: string): ParsedFinancials {
|
||||
|
||||
for (let j = lookAheadStart; j < lookAheadEnd; j++) {
|
||||
const checkLine = lines[j] || '';
|
||||
const hasNumbers = MONEY_REGEX.test(checkLine) || PERCENT_REGEX.test(checkLine);
|
||||
const hasNumbers = containsMoneyOrPercent(checkLine);
|
||||
|
||||
if (!hasNumbers) continue; // Skip lines without numbers
|
||||
|
||||
@@ -441,14 +449,27 @@ export function parseFinancialsFromText(fullText: string): ParsedFinancials {
|
||||
|
||||
// CRITICAL: Only match rows that contain BOTH the field name AND numeric values
|
||||
// This prevents matching descriptive text that just mentions financial terms
|
||||
const hasMoneyOrPercent = MONEY_REGEX.test(combinedForTokens) || PERCENT_REGEX.test(combinedForTokens);
|
||||
const hasMoneyOrPercent = containsMoneyOrPercent(combinedForTokens);
|
||||
if (!hasMoneyOrPercent) continue; // Skip lines without actual financial numbers
|
||||
|
||||
for (const [field, matcher] of Object.entries(ROW_MATCHERS)) {
|
||||
if (!matcher.test(line)) continue;
|
||||
|
||||
// Extract tokens from the combined lines
|
||||
const tokens = extractNumericTokens(line, combinedForTokens);
|
||||
const extraContent = `${nextLine} ${lineAfterNext}`.trim() || undefined;
|
||||
let tokens = extractNumericTokens(line, extraContent);
|
||||
|
||||
if (['grossMargin', 'ebitdaMargin', 'revenueGrowth'].includes(field)) {
|
||||
const percentTokens = tokens.filter(isPercentLike);
|
||||
if (percentTokens.length > 0) {
|
||||
tokens = percentTokens;
|
||||
}
|
||||
} else if (['revenue', 'grossProfit', 'ebitda'].includes(field)) {
|
||||
const moneyTokens = tokens.filter(isMoneyLike);
|
||||
if (moneyTokens.length > 0) {
|
||||
tokens = moneyTokens;
|
||||
}
|
||||
}
|
||||
|
||||
// Only process if we found meaningful tokens (at least 2, indicating multiple periods)
|
||||
if (tokens.length < 2) {
|
||||
@@ -504,3 +525,10 @@ export function parseFinancialsFromText(fullText: string): ParsedFinancials {
|
||||
|
||||
return result;
|
||||
}
|
||||
const containsMoneyOrPercent = (text: string): boolean => {
|
||||
resetRegex(MONEY_REGEX);
|
||||
const hasMoney = MONEY_REGEX.test(text);
|
||||
resetRegex(PERCENT_REGEX);
|
||||
const hasPercent = PERCENT_REGEX.test(text);
|
||||
return hasMoney || hasPercent;
|
||||
};
|
||||
|
||||
9480
backend/test-fixtures/handiFoods/handi-foods-cim.txt
Normal file
9480
backend/test-fixtures/handiFoods/handi-foods-cim.txt
Normal file
File diff suppressed because it is too large
Load Diff
231
backend/test-fixtures/handiFoods/handi-foods-output.txt
Normal file
231
backend/test-fixtures/handiFoods/handi-foods-output.txt
Normal file
@@ -0,0 +1,231 @@
|
||||
BLUEPOINT Capital Partners
|
||||
CIM Review Report
|
||||
Generated: 2/23/2026 at 7:15:07 PM
|
||||
|
||||
Deal Overview
|
||||
Geography: Toronto, Canada and Newkirk, Canada
|
||||
Reviewers: Not specified in CIM
|
||||
Deal Source: Not specified in CIM
|
||||
Cim Page Count: 81
|
||||
Date Reviewed: Not specified in CIM
|
||||
Employee Count: Not specified in CIM
|
||||
Industry Sector: Specialty Food Manufacturing / Better-For-You Baked Snacks
|
||||
Date C I M Received: Not specified in CIM
|
||||
Transaction Type: Not specified in CIM
|
||||
Target Company Name: Handi Foods
|
||||
Stated Reason For Sale: Not specified in CIM
|
||||
|
||||
Business Description
|
||||
Key Products Services: Crackers (60% of gross sales), Chips (21% of gross sales), Pretzel Chips (17% of
|
||||
gross sales), and Puffs & Bits (2% of gross sales). The company provides end-to-end manufacturing solutions
|
||||
including R&D, product development, manufacturing, and packaging services for private label retailers and comanufacturing partnerships.
|
||||
Core Operations Summary: Handi Foods is a leading value-added provider of baked snacks in North
|
||||
America, specializing in better-for-you (BFY) baked snacks including crackers, pretzel chips, chips, and puffs &
|
||||
bits. The company operates as an end-to-end solutions partner, simplifying private label and co-manufacturing
|
||||
programs for major retailers and brand partners. With two manufacturing facilities totaling 150K+ square feet
|
||||
and recent $65M+ capital investment in high-capacity automated production lines, Handi Foods serves both
|
||||
private label (69% of sales) and brand partner (31% of sales) customers across the U.S. (83% of sales) and
|
||||
Canada (17% of sales).
|
||||
Unique Value Proposition: Market-leading position with ~60% share of private label pita cracker & pita chip
|
||||
sales in U.S. & Canada, providing end-to-end solutions partner capabilities with highly automated, scalable
|
||||
manufacturing platform and strong customer loyalty with 91%+ of sales from sole source customers.
|
||||
|
||||
Market & Industry Analysis
|
||||
Barriers To Entry: Significant capital requirements for automated production lines ($65M+ recent investment),
|
||||
established customer relationships with sole source agreements, regulatory compliance for food manufacturing,
|
||||
and economies of scale in production.
|
||||
Key Industry Trends: Growing demand for better-for-you (BFY) baked snacks, private label expansion, and
|
||||
specialty snacking categories including sourdough, brioche, and functional formats.
|
||||
Estimated Market Size: Operating within the sizable ~$12B North American baked snack market. Near-term
|
||||
addressable market for current core and emerging product offerings estimated at ~$1,315M-$1,425M by
|
||||
2025E, growing to ~$1,925M-$2,245M by 2028P.
|
||||
Estimated Market Growth Rate: $470M-$510M addressable market growing at 5-6% CAGR for private label
|
||||
pita snacking segment where company holds ~60% market share.
|
||||
|
||||
Financial Summary
|
||||
Quality Of Earnings: FY2025A PF Adjusted EBITDA reflects $4.5M in one-time and non-recurring
|
||||
adjustments and $0.5M in pro forma adjustments, indicating some earnings quality considerations. Quality of
|
||||
|
||||
Earnings report prepared by BDO Canada LLP for periods FY2023A through FY2025A with adjustments for
|
||||
non-recurring and extraordinary items.
|
||||
Capital Expenditures: Total capital expenditures of $3.8M in FY2025A ($0.7M maintenance, $3.2M growth).
|
||||
Significant growth capex planned: $12.9M in FY2026E, $13.3M in FY2027P for new production line
|
||||
installations. Maintenance capex running at approximately 0.7-1.1% of revenue.
|
||||
Free Cash Flow Quality: 95%+ free cash flow conversion based on (Adj. EBITDA - Maintenance Capital
|
||||
Expenditures) / Adj. EBITDA calculation, indicating strong cash generation and high-quality earnings
|
||||
conversion.
|
||||
Revenue Growth Drivers: Volume expansion within existing top customers such as Trader Joe's and Lidl,
|
||||
increased ASP driven by turnkey program at Hain Canada, introduction of 11 inaugural Pretzelized SKUs in
|
||||
March 2024, and rapid increase of the Pretzelized business with addition of 17 new SKUs in FY2025A. Net
|
||||
sales CAGR of 25.2% from FY2021A-FY2025A.
|
||||
Margin Stability Analysis: Gross margin improved from 32.0% in FY2023A to 35.7% in FY2025A (370 bps
|
||||
improvement), driven by increased pricing in response to commodity inflation and fixed operating cost leverage.
|
||||
EBITDA margin expanded from 26.8% in FY2023A to 31.9% in FY2025A (510 bps improvement),
|
||||
demonstrating strong operational leverage and margin expansion capability.
|
||||
Working Capital Intensity: Not specifically detailed in CIM, but freight, transportation, and commissions
|
||||
decreased from $4.5M to $4.0M despite revenue growth, suggesting improving working capital efficiency.
|
||||
|
||||
Management Team Overview
|
||||
Key Leaders: Brian Arbique as CEO since 2017, John Dobie as VP of Operations since 2017, Marc Diamant
|
||||
as CFO in 2024.
|
||||
Organizational Structure: Not specified in CIM
|
||||
Post Transaction Intentions: Not specified in CIM
|
||||
Management Quality Assessment: Experienced management team with Brian Arbique as CEO since 2017
|
||||
and John Dobie as VP of Operations since 2017, indicating 8+ years of tenure during the company's
|
||||
transformation and growth phase. Recent addition of Marc Diamant as CFO in 2024 suggests
|
||||
professionalization of finance function. Management has overseen successful transition from pita bread to BFY
|
||||
snacks, significant capacity expansion, and strong financial performance.
|
||||
|
||||
Preliminary Investment Thesis
|
||||
Key Attractions: 1. Market-leading position with ~60% share of private label pita cracker & pita chip sales in
|
||||
U.S. & Canada, providing significant competitive moat and pricing power in a $470M-$510M addressable
|
||||
market growing at 5-6% CAGR. This dominant position supports sustainable revenue growth and margin
|
||||
expansion opportunities. 2. Exceptional financial performance with 25.2% net sales CAGR from FY2021AFY2025A, reaching $90.1M revenue in FY2025A, and EBITDA margin expansion from 26.8% to 31.9% over
|
||||
two years, demonstrating strong operational leverage and scalability. 3. Transformative customer relationship
|
||||
with Pretzelized, growing from first order in March 2024 to projected $7.2M KGs volume by FY2030P under
|
||||
exclusive long-term sole source agreement, representing significant embedded growth with high-growth brand
|
||||
partner. 4. Highly automated, scalable manufacturing platform with $65M+ recent capex investment in highcapacity production lines, providing ample capacity for growth and operational efficiency advantages over
|
||||
competitors. 5. Strong customer loyalty with 91%+ of FY2025A gross sales from sole source customers and
|
||||
average top 10 customer tenure of ~8 years, indicating sticky customer relationships and predictable revenue
|
||||
base. 6. Diversified and attractive business mix across channels (Grocery 41%, Mass 35%, Private Label
|
||||
Grocers 14%, Club 10%) and geographies (U.S. 83%, Canada 17%), reducing concentration risk while
|
||||
maintaining market leadership. 7. Proven innovation capabilities with 35 new SKUs launched since FY2021A
|
||||
and robust R&D pipeline, including emerging products in high-growth categories like sourdough, brioche, and
|
||||
functional formats with estimated $14.5M FY2030P new product revenue opportunity. 8. Exceptional cash
|
||||
generation with 95%+ free cash flow conversion, providing strong cash returns and flexibility for growth
|
||||
investments and potential acquisitions.
|
||||
Potential Risks: 1. Customer concentration risk (Operational): While 91%+ of sales from sole source
|
||||
customers provides stability, loss of any major customer could significantly impact revenue. Probability: Low,
|
||||
Impact: High. Mitigation: Long-term contracts and strong customer satisfaction scores. Deal-breaker: No, but
|
||||
requires careful contract review. 2. Commodity price volatility (Financial): Direct materials represent significant
|
||||
cost component, and commodity inflation could pressure margins if not passed through to customers.
|
||||
Probability: Medium, Impact: Medium. Mitigation: Pricing mechanisms and customer relationships support price
|
||||
increases. Deal-breaker: No. 3. Capacity utilization risk (Operational): Current utilization at 63% in FY2025A
|
||||
with significant capex planned for new lines, creating risk of underutilized assets if growth doesn't materialize.
|
||||
|
||||
Probability: Medium, Impact: Medium. Mitigation: Strong customer demand visibility and contracted growth.
|
||||
Deal-breaker: No. 4. Pretzelized dependence risk (Operational): Rapid growth tied to single brand partner
|
||||
Pretzelized creates concentration risk if relationship deteriorates or brand fails to achieve projected growth.
|
||||
Probability: Low, Impact: High. Mitigation: Exclusive long-term contract and strong collaborative relationship.
|
||||
Deal-breaker: No, but requires deep customer diligence. 5. Private label competitive dynamics (Market): Private
|
||||
label customers could potentially switch suppliers or bring production in-house, threatening market position.
|
||||
Probability: Low, Impact: Medium. Mitigation: Sole source agreements and high switching costs. Deal-breaker:
|
||||
No. 6. Food safety and regulatory risk (Regulatory): Food manufacturing subject to strict regulations and
|
||||
potential recalls could damage reputation and financial performance. Probability: Low, Impact: High. Mitigation:
|
||||
Established quality systems and insurance coverage. Deal-breaker: No, but requires operational diligence. 7.
|
||||
Cross-border operations complexity (Operational): Operating in both U.S. and Canada creates currency,
|
||||
regulatory, and operational complexity. Probability: Medium, Impact: Low. Mitigation: Experienced management
|
||||
and established operations. Deal-breaker: No.
|
||||
Value Creation Levers: 1. Pricing optimization and margin expansion: Leverage market-leading position to
|
||||
implement 2-3% price increases across product portfolio, potentially adding $1.8-2.7M annual revenue with
|
||||
high flow-through to EBITDA given fixed cost base. Implementation: BPCP pricing expertise and market
|
||||
analysis. Timeline: 12-18 months. Confidence: High. 2. Operational efficiency improvements: Optimize
|
||||
production scheduling, reduce changeover times, and improve labor productivity through BPCP's operational
|
||||
expertise, targeting 100-200 bps EBITDA margin improvement worth $0.9-1.8M annually. Timeline: 18-24
|
||||
months. Confidence: Medium-High. 3. M&A consolidation strategy: Acquire complementary baked snack
|
||||
manufacturers to expand product portfolio, customer base, and geographic reach, with illustrative targets
|
||||
ranging from $15M-$40M EBITDA providing platform for 2-3x revenue growth. Implementation: BPCP's M&A
|
||||
expertise. Timeline: 12-36 months. Confidence: Medium. 4. New product development acceleration: Leverage
|
||||
innovation pipeline including sourdough, brioche, and functional formats to capture estimated $14.5M FY2030P
|
||||
revenue opportunity, with BPCP supporting go-to-market strategy and customer development. Timeline: 24-36
|
||||
months. Confidence: Medium. 5. Customer diversification and expansion: Utilize BPCP's consumer industry
|
||||
relationships to accelerate new customer wins and expand wallet share with existing customers, targeting
|
||||
15-20% revenue growth through customer expansion. Timeline: 18-30 months. Confidence: Medium-High. 6.
|
||||
Supply chain optimization: Implement BPCP's supply chain expertise to optimize procurement, reduce direct
|
||||
material costs by 50-100 bps, and improve working capital efficiency, potentially adding $0.5-0.9M annual
|
||||
EBITDA. Timeline: 12-24 months. Confidence: Medium. 7. Technology and automation enhancement: Further
|
||||
automate production processes and implement data analytics to improve yield, reduce waste, and optimize
|
||||
capacity utilization, targeting 2-3% improvement in gross margins. Timeline: 24-36 months. Confidence:
|
||||
Medium. 8. International expansion: Leverage cross-border capabilities to expand into additional international
|
||||
markets beyond current U.S./Canada footprint, potentially adding 10-15% revenue growth over 3-5 years.
|
||||
Timeline: 36-60 months. Confidence: Low-Medium.
|
||||
Alignment With Fund Strategy: EBITDA Range Fit (Score: 10/10): LTM Adjusted EBITDA of $28.8M CAD
|
||||
(~$21M USD) fits perfectly within BPCP's 5+MM EBITDA target range. Industry Focus (Score: 9/10): Specialty
|
||||
food manufacturing in consumer end market aligns strongly with BPCP's consumer focus, though industrial
|
||||
component is limited. Geographic Preferences (Score: 4/10): Toronto and Newkirk, Canada locations are not
|
||||
within driving distance of Cleveland or Charlotte, presenting geographic misalignment challenge. Value
|
||||
Creation Expertise Alignment (Score: 9/10): Strong alignment with BPCP's M&A capabilities (fragmented
|
||||
market consolidation opportunity), technology & automation (recent $65M investment platform), supply chain
|
||||
optimization (procurement and vertical integration opportunities), and operational improvements (capacity
|
||||
utilization, efficiency gains). Founder/Family Ownership (Score: 8/10): Founded by first-generation immigrant in
|
||||
1977 with family heritage, though current ownership by Ironbridge Equity Partners since 2022 reduces founder
|
||||
involvement. Market Position (Score: 9/10): Leading platform with defensible competitive position and growth
|
||||
runway aligns with BPCP's preference for market leaders. Financial Profile (Score: 9/10): Strong growth (25.2%
|
||||
CAGR), margin expansion (18.6% to 31.9%), and cash generation (95%+ FCF conversion) align with BPCP's
|
||||
financial criteria. Overall Alignment Score: 8.3/10. Strong strategic fit across most criteria with primary concern
|
||||
being geographic distance from BPCP's preferred Cleveland/Charlotte proximity. The company's scale, market
|
||||
position, growth profile, and value creation opportunities align well with BPCP's investment strategy despite
|
||||
geographic considerations.
|
||||
|
||||
Key Questions & Next Steps
|
||||
Critical Questions: 1. What is the detailed ownership structure and are current owners founder/family-owned
|
||||
as preferred by BPCP? This is critical for understanding seller motivations, transaction structure, and alignment
|
||||
with BPCP's investment preferences for founder/family-owned businesses. Priority: High Impact. 2. What are the
|
||||
specific terms, duration, and renewal provisions of the exclusive Pretzelized contract given its importance to
|
||||
growth projections? With Pretzelized representing significant projected growth, understanding contract
|
||||
protection and renewal risk is essential for validating growth assumptions and investment thesis. Priority: Dealbreaker. 3. What is the detailed management team composition, experience, and post-transaction retention
|
||||
|
||||
plans? Given the operational complexity and growth plans, management quality and retention is critical for
|
||||
successful value creation and operational execution. Priority: High Impact. 4. What are the specific capacity
|
||||
utilization rates by production line and facility, and how does planned capex align with contracted customer
|
||||
demand? With 63% current utilization and $26M+ planned capex, understanding capacity-demand alignment is
|
||||
crucial for validating growth capex requirements and returns. Priority: High Impact. 5. What is the customer
|
||||
contract renewal schedule and historical retention rates for the next 24 months? With 91%+ sole source
|
||||
customer relationships, understanding renewal timing and retention risk is essential for revenue predictability
|
||||
and valuation support. Priority: High Impact. 6. What are the detailed EBITDA adjustments and quality of
|
||||
earnings issues identified in the BDO report? With $4.5M in one-time adjustments in FY2025A, understanding
|
||||
earnings quality is critical for normalized EBITDA assessment and valuation. Priority: High Impact. 7. What is
|
||||
the competitive response risk if Handi Foods continues taking market share in private label pita snacking?
|
||||
Understanding competitive dynamics and potential retaliation is important for assessing sustainability of market
|
||||
leadership and pricing power. Priority: Medium Impact. 8. What are the specific food safety protocols, insurance
|
||||
coverage, and historical recall/quality issues? Given food manufacturing risks, understanding quality systems
|
||||
and risk mitigation is essential for operational due diligence. Priority: Medium Impact.
|
||||
Proposed Next Steps: 1. Schedule comprehensive management presentation to assess team quality,
|
||||
experience, and post-transaction intentions, including detailed discussion of growth strategy and operational
|
||||
capabilities. Involve: Investment team lead, operating partner. Timeline: Within 1 week. 2. Conduct detailed
|
||||
customer reference calls with top 5 customers to validate relationship strength, contract terms, renewal
|
||||
likelihood, and growth potential. Focus particularly on Pretzelized relationship and contract terms. Involve:
|
||||
Investment team, industry expert. Timeline: Within 2 weeks. 3. Engage food industry expert and former private
|
||||
label executive to assess competitive positioning, market dynamics, and growth sustainability in baked snack
|
||||
categories. Involve: Investment team, external advisor. Timeline: Within 2 weeks. 4. Review detailed BDO
|
||||
Quality of Earnings report to understand EBITDA adjustments, accounting policies, and earnings quality issues.
|
||||
Involve: Investment team, accounting advisor. Timeline: Within 1 week. 5. Conduct facility tours of both Newkirk
|
||||
and Mississauga operations to assess manufacturing capabilities, automation levels, capacity utilization, and
|
||||
expansion plans. Involve: Investment team, operations expert. Timeline: Within 3 weeks. 6. Analyze detailed
|
||||
customer contracts, renewal schedules, and pricing mechanisms to validate revenue predictability and
|
||||
customer retention assumptions. Involve: Investment team, legal counsel. Timeline: Within 2 weeks. 7. Develop
|
||||
preliminary value creation plan focusing on pricing optimization, operational improvements, and M&A strategy
|
||||
with specific target identification. Involve: Investment team, operating partners. Timeline: Within 3 weeks. 8.
|
||||
Prepare detailed financial model incorporating capacity analysis, customer growth projections, and sensitivity
|
||||
analysis for key assumptions. Involve: Investment team, financial modeling expert. Timeline: Within 2 weeks.
|
||||
Missing Information: 1. Detailed management team bios, experience, and organizational structure - Critical for
|
||||
assessing execution capability and post-transaction planning. This impacts investment decision by determining
|
||||
management retention needs and operational risk assessment. Priority: High Impact. 2. Ownership structure
|
||||
and seller motivations - Essential for understanding transaction dynamics, seller expectations, and alignment
|
||||
with BPCP preferences for founder/family-owned businesses. Missing this makes deal structuring and
|
||||
negotiation strategy difficult. Priority: High Impact. 3. Detailed customer contract terms, renewal schedules, and
|
||||
pricing mechanisms - Critical for understanding revenue predictability, pricing power, and customer retention
|
||||
risk. This directly impacts revenue projections and valuation multiples. Priority: High Impact. 4. Working capital
|
||||
analysis and cash flow statement details - Important for understanding cash generation quality, working capital
|
||||
requirements, and free cash flow sustainability. Missing this limits financial modeling accuracy. Priority: Medium
|
||||
Impact. 5. Competitive landscape analysis and market share data beyond pita snacking - Needed to understand
|
||||
broader competitive positioning and market dynamics across all product categories. This impacts growth
|
||||
strategy and competitive risk assessment. Priority: Medium Impact. 6. Detailed capex plans, equipment
|
||||
specifications, and capacity analysis by facility - Important for validating growth capex requirements and returns
|
||||
on invested capital. Missing this limits assessment of capital efficiency and growth sustainability. Priority:
|
||||
Medium Impact. 7. Supply chain analysis including key suppliers, procurement strategies, and commodity
|
||||
hedging - Critical for understanding cost structure stability and supply chain risk. This impacts margin
|
||||
predictability and operational risk assessment. Priority: Medium Impact. 8. Historical M&A activity and
|
||||
integration capabilities - Important for assessing platform acquisition potential and management's M&A
|
||||
execution track record. Missing this limits value creation strategy development. Priority: Nice-to-know.
|
||||
Preliminary Recommendation: Proceed with Caution
|
||||
Rationale For Recommendation: Strong financial performance with 25.2% revenue CAGR and expanding
|
||||
EBITDA margins demonstrates scalable business model. Market-leading position with ~60% share in growing
|
||||
private label pita snacking market provides competitive moat and pricing power. Excellent strategic fit with
|
||||
BPCP's consumer focus, EBITDA scale requirements, and value creation expertise in M&A, operations, and
|
||||
supply chain optimization. High-quality cash generation with 95%+ free cash flow conversion supports attractive
|
||||
returns potential.
|
||||
|
||||
BLUEPOINT Capital Partners | CIM Document Processor | Confidential
|
||||
|
||||
|
||||
Reference in New Issue
Block a user