89 lines
3.2 KiB
PL/PgSQL
89 lines
3.2 KiB
PL/PgSQL
-- Enable the pgvector extension
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
|
-- Create document_chunks table with vector support
|
|
CREATE TABLE IF NOT EXISTS document_chunks (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
document_id VARCHAR(255) NOT NULL,
|
|
chunk_index INTEGER NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding vector(1536), -- OpenAI embeddings are 1536 dimensions
|
|
metadata JSONB DEFAULT '{}',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Create indexes for better performance
|
|
CREATE INDEX IF NOT EXISTS document_chunks_document_id_idx ON document_chunks(document_id);
|
|
CREATE INDEX IF NOT EXISTS document_chunks_embedding_idx ON document_chunks USING ivfflat (embedding vector_cosine_ops);
|
|
|
|
-- Create function to enable pgvector (for RPC calls)
|
|
CREATE OR REPLACE FUNCTION enable_pgvector()
|
|
RETURNS VOID AS $$
|
|
BEGIN
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Create function to create document_chunks table (for RPC calls)
|
|
CREATE OR REPLACE FUNCTION create_document_chunks_table()
|
|
RETURNS VOID AS $$
|
|
BEGIN
|
|
CREATE TABLE IF NOT EXISTS document_chunks (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
document_id VARCHAR(255) NOT NULL,
|
|
chunk_index INTEGER NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding vector(1536),
|
|
metadata JSONB DEFAULT '{}',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS document_chunks_document_id_idx ON document_chunks(document_id);
|
|
CREATE INDEX IF NOT EXISTS document_chunks_embedding_idx ON document_chunks USING ivfflat (embedding vector_cosine_ops);
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Create function to match documents based on vector similarity
|
|
CREATE OR REPLACE FUNCTION match_documents(
|
|
query_embedding vector(1536),
|
|
match_threshold float DEFAULT 0.7,
|
|
match_count int DEFAULT 10
|
|
)
|
|
RETURNS TABLE(
|
|
id UUID,
|
|
content TEXT,
|
|
metadata JSONB,
|
|
document_id VARCHAR(255),
|
|
similarity FLOAT
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT
|
|
document_chunks.id,
|
|
document_chunks.content,
|
|
document_chunks.metadata,
|
|
document_chunks.document_id,
|
|
1 - (document_chunks.embedding <=> query_embedding) AS similarity
|
|
FROM document_chunks
|
|
WHERE 1 - (document_chunks.embedding <=> query_embedding) > match_threshold
|
|
ORDER BY document_chunks.embedding <=> query_embedding
|
|
LIMIT match_count;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Enable Row Level Security (RLS) if needed
|
|
-- ALTER TABLE document_chunks ENABLE ROW LEVEL SECURITY;
|
|
|
|
-- Create policies for RLS (adjust as needed for your auth requirements)
|
|
-- CREATE POLICY "Users can view all document chunks" ON document_chunks FOR SELECT USING (true);
|
|
-- CREATE POLICY "Users can insert document chunks" ON document_chunks FOR INSERT WITH CHECK (true);
|
|
-- CREATE POLICY "Users can update document chunks" ON document_chunks FOR UPDATE USING (true);
|
|
-- CREATE POLICY "Users can delete document chunks" ON document_chunks FOR DELETE USING (true);
|
|
|
|
-- Grant necessary permissions
|
|
GRANT ALL ON document_chunks TO authenticated;
|
|
GRANT ALL ON document_chunks TO anon;
|
|
GRANT EXECUTE ON FUNCTION match_documents TO authenticated;
|
|
GRANT EXECUTE ON FUNCTION match_documents TO anon; |