-- Enable the pgvector extension CREATE EXTENSION IF NOT EXISTS vector; -- Create document_chunks table with vector support CREATE TABLE IF NOT EXISTS document_chunks ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), document_id VARCHAR(255) NOT NULL, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, embedding vector(1536), -- OpenAI embeddings are 1536 dimensions metadata JSONB DEFAULT '{}', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); -- Create indexes for better performance CREATE INDEX IF NOT EXISTS document_chunks_document_id_idx ON document_chunks(document_id); CREATE INDEX IF NOT EXISTS document_chunks_embedding_idx ON document_chunks USING ivfflat (embedding vector_cosine_ops); -- Create function to enable pgvector (for RPC calls) CREATE OR REPLACE FUNCTION enable_pgvector() RETURNS VOID AS $$ BEGIN CREATE EXTENSION IF NOT EXISTS vector; END; $$ LANGUAGE plpgsql; -- Create function to create document_chunks table (for RPC calls) CREATE OR REPLACE FUNCTION create_document_chunks_table() RETURNS VOID AS $$ BEGIN CREATE TABLE IF NOT EXISTS document_chunks ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), document_id VARCHAR(255) NOT NULL, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, embedding vector(1536), metadata JSONB DEFAULT '{}', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS document_chunks_document_id_idx ON document_chunks(document_id); CREATE INDEX IF NOT EXISTS document_chunks_embedding_idx ON document_chunks USING ivfflat (embedding vector_cosine_ops); END; $$ LANGUAGE plpgsql; -- Create function to match documents based on vector similarity CREATE OR REPLACE FUNCTION match_documents( query_embedding vector(1536), match_threshold float DEFAULT 0.7, match_count int DEFAULT 10 ) RETURNS TABLE( id UUID, content TEXT, metadata JSONB, document_id VARCHAR(255), similarity FLOAT ) AS $$ BEGIN RETURN QUERY SELECT document_chunks.id, document_chunks.content, document_chunks.metadata, document_chunks.document_id, 1 - (document_chunks.embedding <=> query_embedding) AS similarity FROM document_chunks WHERE 1 - (document_chunks.embedding <=> query_embedding) > match_threshold ORDER BY document_chunks.embedding <=> query_embedding LIMIT match_count; END; $$ LANGUAGE plpgsql; -- Enable Row Level Security (RLS) if needed -- ALTER TABLE document_chunks ENABLE ROW LEVEL SECURITY; -- Create policies for RLS (adjust as needed for your auth requirements) -- CREATE POLICY "Users can view all document chunks" ON document_chunks FOR SELECT USING (true); -- CREATE POLICY "Users can insert document chunks" ON document_chunks FOR INSERT WITH CHECK (true); -- CREATE POLICY "Users can update document chunks" ON document_chunks FOR UPDATE USING (true); -- CREATE POLICY "Users can delete document chunks" ON document_chunks FOR DELETE USING (true); -- Grant necessary permissions GRANT ALL ON document_chunks TO authenticated; GRANT ALL ON document_chunks TO anon; GRANT EXECUTE ON FUNCTION match_documents TO authenticated; GRANT EXECUTE ON FUNCTION match_documents TO anon;