Files
cim_summary/backend/test-vector-search.js
Jon df079713c4 feat: Complete cloud-native CIM Document Processor with full BPCP template
🌐 Cloud-Native Architecture:
- Firebase Functions deployment (no Docker)
- Supabase database (replacing local PostgreSQL)
- Google Cloud Storage integration
- Document AI + Agentic RAG processing pipeline
- Claude-3.5-Sonnet LLM integration

 Full BPCP CIM Review Template (7 sections):
- Deal Overview
- Business Description
- Market & Industry Analysis
- Financial Summary (with historical financials table)
- Management Team Overview
- Preliminary Investment Thesis
- Key Questions & Next Steps

🔧 Cloud Migration Improvements:
- PostgreSQL → Supabase migration complete
- Local storage → Google Cloud Storage
- Docker deployment → Firebase Functions
- Schema mapping fixes (camelCase/snake_case)
- Enhanced error handling and logging
- Vector database with fallback mechanisms

📄 Complete End-to-End Cloud Workflow:
1. Upload PDF → Document AI extraction
2. Agentic RAG processing → Structured CIM data
3. Store in Supabase → Vector embeddings
4. Auto-generate PDF → Full BPCP template
5. Download complete CIM review

🚀 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-01 17:51:45 -04:00

129 lines
4.3 KiB
JavaScript

const { createClient } = require('@supabase/supabase-js');
// Load environment variables
require('dotenv').config();
const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceKey = process.env.SUPABASE_SERVICE_KEY;
const supabase = createClient(supabaseUrl, supabaseServiceKey);
async function testVectorSearch() {
console.log('🔍 Testing vector search function...');
// Create a test embedding (1536 dimensions with small random values)
const testEmbedding = new Array(1536).fill(0).map(() => Math.random() * 0.1);
console.log('📊 Test embedding created with', testEmbedding.length, 'dimensions');
// Test the vector search function
const { data, error } = await supabase.rpc('match_document_chunks', {
query_embedding: testEmbedding,
match_threshold: 0.1,
match_count: 5
});
if (error) {
console.log('❌ Vector search function error:', error);
if (error.code === '42883') {
console.log('📝 match_document_chunks function does not exist');
console.log('');
console.log('🛠️ Please create the function in Supabase SQL Editor:');
console.log('');
console.log(`-- First enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Create vector similarity search function
CREATE OR REPLACE FUNCTION match_document_chunks(
query_embedding VECTOR(1536),
match_threshold FLOAT DEFAULT 0.7,
match_count INTEGER DEFAULT 10
)
RETURNS TABLE (
id UUID,
document_id TEXT,
content TEXT,
metadata JSONB,
chunk_index INTEGER,
similarity FLOAT
)
LANGUAGE SQL STABLE
AS $$
SELECT
document_chunks.id,
document_chunks.document_id,
document_chunks.content,
document_chunks.metadata,
document_chunks.chunk_index,
1 - (document_chunks.embedding <=> query_embedding) AS similarity
FROM document_chunks
WHERE document_chunks.embedding IS NOT NULL
AND 1 - (document_chunks.embedding <=> query_embedding) > match_threshold
ORDER BY document_chunks.embedding <=> query_embedding
LIMIT match_count;
$$;`);
}
} else {
console.log('✅ Vector search function works!');
console.log('📊 Search results:', data ? data.length : 0, 'matches found');
if (data && data.length > 0) {
console.log('First result:', data[0]);
}
}
// Also test basic insert with embedding
console.log('🧪 Testing insert with embedding...');
const testChunk = {
document_id: 'test-doc-with-embedding',
content: 'This is a test chunk with an embedding vector',
chunk_index: 1,
embedding: testEmbedding,
metadata: { test: true, hasEmbedding: true }
};
const { data: insertData, error: insertError } = await supabase
.from('document_chunks')
.insert(testChunk)
.select();
if (insertError) {
console.log('❌ Insert with embedding failed:', insertError);
} else {
console.log('✅ Insert with embedding successful!');
console.log('Inserted chunk ID:', insertData[0].id);
// Test search again with data
console.log('🔍 Testing search with actual data...');
const { data: searchData, error: searchError } = await supabase.rpc('match_document_chunks', {
query_embedding: testEmbedding,
match_threshold: 0.5,
match_count: 5
});
if (searchError) {
console.log('❌ Search with data failed:', searchError);
} else {
console.log('✅ Search with data successful!');
console.log('Found', searchData ? searchData.length : 0, 'results');
if (searchData && searchData.length > 0) {
console.log('Best match similarity:', searchData[0].similarity);
}
}
// Clean up test data
const { error: deleteError } = await supabase
.from('document_chunks')
.delete()
.eq('document_id', 'test-doc-with-embedding');
if (deleteError) {
console.log('⚠️ Could not clean up test data:', deleteError.message);
} else {
console.log('🧹 Test data cleaned up');
}
}
}
testVectorSearch();