feat: Complete cloud-native CIM Document Processor with full BPCP template

🌐 Cloud-Native Architecture:
- Firebase Functions deployment (no Docker)
- Supabase database (replacing local PostgreSQL)
- Google Cloud Storage integration
- Document AI + Agentic RAG processing pipeline
- Claude-3.5-Sonnet LLM integration

 Full BPCP CIM Review Template (7 sections):
- Deal Overview
- Business Description
- Market & Industry Analysis
- Financial Summary (with historical financials table)
- Management Team Overview
- Preliminary Investment Thesis
- Key Questions & Next Steps

🔧 Cloud Migration Improvements:
- PostgreSQL → Supabase migration complete
- Local storage → Google Cloud Storage
- Docker deployment → Firebase Functions
- Schema mapping fixes (camelCase/snake_case)
- Enhanced error handling and logging
- Vector database with fallback mechanisms

📄 Complete End-to-End Cloud Workflow:
1. Upload PDF → Document AI extraction
2. Agentic RAG processing → Structured CIM data
3. Store in Supabase → Vector embeddings
4. Auto-generate PDF → Full BPCP template
5. Download complete CIM review

🚀 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Jon
2025-08-01 17:51:45 -04:00
parent 3d94fcbeb5
commit df079713c4
45 changed files with 2320 additions and 5282 deletions

View File

@@ -0,0 +1,153 @@
const { createClient } = require('@supabase/supabase-js');
const fs = require('fs');
const path = require('path');
// Load environment variables
require('dotenv').config();
const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceKey = process.env.SUPABASE_SERVICE_KEY;
if (!supabaseUrl || !supabaseServiceKey) {
console.error('❌ Missing Supabase credentials');
console.error('Make sure SUPABASE_URL and SUPABASE_SERVICE_KEY are set in .env');
process.exit(1);
}
const supabase = createClient(supabaseUrl, supabaseServiceKey);
async function setupVectorDatabase() {
try {
console.log('🚀 Setting up Supabase vector database...');
// Read the SQL setup script
const sqlScript = fs.readFileSync(path.join(__dirname, 'supabase_vector_setup.sql'), 'utf8');
// Split the script into individual statements
const statements = sqlScript
.split(';')
.map(stmt => stmt.trim())
.filter(stmt => stmt.length > 0 && !stmt.startsWith('--'));
console.log(`📝 Executing ${statements.length} SQL statements...`);
// Execute each statement
for (let i = 0; i < statements.length; i++) {
const statement = statements[i];
if (statement.trim()) {
console.log(` Executing statement ${i + 1}/${statements.length}...`);
const { data, error } = await supabase.rpc('exec_sql', {
sql: statement
});
if (error) {
console.error(`❌ Error executing statement ${i + 1}:`, error);
// Don't exit, continue with other statements
} else {
console.log(` ✅ Statement ${i + 1} executed successfully`);
}
}
}
// Test the setup by checking if the table exists
console.log('🔍 Verifying table structure...');
const { data: columns, error: tableError } = await supabase
.from('document_chunks')
.select('*')
.limit(0);
if (tableError) {
console.error('❌ Error verifying table:', tableError);
} else {
console.log('✅ document_chunks table verified successfully');
}
// Test the search function
console.log('🔍 Testing vector search function...');
const testEmbedding = new Array(1536).fill(0.1); // Test embedding
const { data: searchResult, error: searchError } = await supabase
.rpc('match_document_chunks', {
query_embedding: testEmbedding,
match_threshold: 0.5,
match_count: 5
});
if (searchError) {
console.error('❌ Error testing search function:', searchError);
} else {
console.log('✅ Vector search function working correctly');
console.log(` Found ${searchResult ? searchResult.length : 0} results`);
}
console.log('🎉 Supabase vector database setup completed successfully!');
} catch (error) {
console.error('❌ Setup failed:', error);
process.exit(1);
}
}
// Alternative approach using direct SQL execution
async function setupVectorDatabaseDirect() {
try {
console.log('🚀 Setting up Supabase vector database (direct approach)...');
// First, enable vector extension
console.log('📦 Enabling pgvector extension...');
const { error: extError } = await supabase.rpc('exec_sql', {
sql: 'CREATE EXTENSION IF NOT EXISTS vector;'
});
if (extError) {
console.log('⚠️ Extension error (might already exist):', extError.message);
}
// Create the table
console.log('🏗️ Creating document_chunks table...');
const createTableSQL = `
CREATE TABLE IF NOT EXISTS document_chunks (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
document_id TEXT NOT NULL,
content TEXT NOT NULL,
embedding VECTOR(1536),
metadata JSONB DEFAULT '{}',
chunk_index INTEGER NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
`;
const { error: tableError } = await supabase.rpc('exec_sql', {
sql: createTableSQL
});
if (tableError) {
console.error('❌ Error creating table:', tableError);
} else {
console.log('✅ Table created successfully');
}
// Test simple insert and select
console.log('🧪 Testing basic operations...');
const { data, error } = await supabase
.from('document_chunks')
.select('count', { count: 'exact' });
if (error) {
console.error('❌ Error testing table:', error);
} else {
console.log('✅ Table is accessible');
}
console.log('🎉 Basic vector database setup completed!');
} catch (error) {
console.error('❌ Setup failed:', error);
}
}
// Run the setup
setupVectorDatabaseDirect();