🌐 Cloud-Native Architecture: - Firebase Functions deployment (no Docker) - Supabase database (replacing local PostgreSQL) - Google Cloud Storage integration - Document AI + Agentic RAG processing pipeline - Claude-3.5-Sonnet LLM integration ✅ Full BPCP CIM Review Template (7 sections): - Deal Overview - Business Description - Market & Industry Analysis - Financial Summary (with historical financials table) - Management Team Overview - Preliminary Investment Thesis - Key Questions & Next Steps 🔧 Cloud Migration Improvements: - PostgreSQL → Supabase migration complete - Local storage → Google Cloud Storage - Docker deployment → Firebase Functions - Schema mapping fixes (camelCase/snake_case) - Enhanced error handling and logging - Vector database with fallback mechanisms 📄 Complete End-to-End Cloud Workflow: 1. Upload PDF → Document AI extraction 2. Agentic RAG processing → Structured CIM data 3. Store in Supabase → Vector embeddings 4. Auto-generate PDF → Full BPCP template 5. Download complete CIM review 🚀 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
111 lines
4.1 KiB
PL/PgSQL
111 lines
4.1 KiB
PL/PgSQL
-- Supabase Vector Database Setup for CIM Document Processor
|
|
-- This script creates the document_chunks table with vector search capabilities
|
|
|
|
-- Enable the pgvector extension for vector operations
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
|
-- Create the document_chunks table
|
|
CREATE TABLE IF NOT EXISTS document_chunks (
|
|
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
|
document_id TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding VECTOR(1536), -- OpenAI embedding dimensions
|
|
metadata JSONB DEFAULT '{}',
|
|
chunk_index INTEGER NOT NULL,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
|
);
|
|
|
|
-- Create indexes for better performance
|
|
CREATE INDEX IF NOT EXISTS idx_document_chunks_document_id ON document_chunks(document_id);
|
|
CREATE INDEX IF NOT EXISTS idx_document_chunks_chunk_index ON document_chunks(chunk_index);
|
|
CREATE INDEX IF NOT EXISTS idx_document_chunks_embedding ON document_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
|
|
|
-- Create a function to automatically update the updated_at timestamp
|
|
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ language 'plpgsql';
|
|
|
|
-- Create trigger to automatically update updated_at
|
|
DROP TRIGGER IF EXISTS update_document_chunks_updated_at ON document_chunks;
|
|
CREATE TRIGGER update_document_chunks_updated_at
|
|
BEFORE UPDATE ON document_chunks
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION update_updated_at_column();
|
|
|
|
-- Create vector similarity search function
|
|
CREATE OR REPLACE FUNCTION match_document_chunks(
|
|
query_embedding VECTOR(1536),
|
|
match_threshold FLOAT DEFAULT 0.7,
|
|
match_count INTEGER DEFAULT 10
|
|
)
|
|
RETURNS TABLE (
|
|
id UUID,
|
|
document_id TEXT,
|
|
content TEXT,
|
|
metadata JSONB,
|
|
chunk_index INTEGER,
|
|
similarity FLOAT
|
|
)
|
|
LANGUAGE SQL STABLE
|
|
AS $$
|
|
SELECT
|
|
document_chunks.id,
|
|
document_chunks.document_id,
|
|
document_chunks.content,
|
|
document_chunks.metadata,
|
|
document_chunks.chunk_index,
|
|
1 - (document_chunks.embedding <=> query_embedding) AS similarity
|
|
FROM document_chunks
|
|
WHERE 1 - (document_chunks.embedding <=> query_embedding) > match_threshold
|
|
ORDER BY document_chunks.embedding <=> query_embedding
|
|
LIMIT match_count;
|
|
$$;
|
|
|
|
-- Create RLS policies for security
|
|
ALTER TABLE document_chunks ENABLE ROW LEVEL SECURITY;
|
|
|
|
-- Policy to allow authenticated users to read chunks
|
|
CREATE POLICY "Users can view document chunks" ON document_chunks
|
|
FOR SELECT USING (auth.role() = 'authenticated');
|
|
|
|
-- Policy to allow authenticated users to insert chunks
|
|
CREATE POLICY "Users can insert document chunks" ON document_chunks
|
|
FOR INSERT WITH CHECK (auth.role() = 'authenticated');
|
|
|
|
-- Policy to allow authenticated users to update their chunks
|
|
CREATE POLICY "Users can update document chunks" ON document_chunks
|
|
FOR UPDATE USING (auth.role() = 'authenticated');
|
|
|
|
-- Policy to allow authenticated users to delete chunks
|
|
CREATE POLICY "Users can delete document chunks" ON document_chunks
|
|
FOR DELETE USING (auth.role() = 'authenticated');
|
|
|
|
-- Grant necessary permissions
|
|
GRANT USAGE ON SCHEMA public TO postgres, anon, authenticated, service_role;
|
|
GRANT ALL ON TABLE document_chunks TO postgres, service_role;
|
|
GRANT SELECT ON TABLE document_chunks TO anon, authenticated;
|
|
GRANT INSERT, UPDATE, DELETE ON TABLE document_chunks TO authenticated, service_role;
|
|
|
|
-- Grant execute permissions on the search function
|
|
GRANT EXECUTE ON FUNCTION match_document_chunks TO postgres, anon, authenticated, service_role;
|
|
|
|
-- Create some sample data for testing (optional)
|
|
-- INSERT INTO document_chunks (document_id, content, chunk_index, metadata)
|
|
-- VALUES
|
|
-- ('test-doc-1', 'This is a test chunk of content for vector search.', 1, '{"test": true}'),
|
|
-- ('test-doc-1', 'Another chunk of content from the same document.', 2, '{"test": true}');
|
|
|
|
-- Display table info
|
|
SELECT
|
|
column_name,
|
|
data_type,
|
|
is_nullable,
|
|
column_default
|
|
FROM information_schema.columns
|
|
WHERE table_name = 'document_chunks'
|
|
ORDER BY ordinal_position; |