Pre-cleanup commit: Current state before service layer consolidation
This commit is contained in:
@@ -1,89 +1,76 @@
|
||||
-- Enable the pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Create document_chunks table with vector support
|
||||
-- Create the document_chunks table
|
||||
CREATE TABLE IF NOT EXISTS document_chunks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
document_id VARCHAR(255) NOT NULL,
|
||||
chunk_index INTEGER NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
embedding vector(1536), -- OpenAI embeddings are 1536 dimensions
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Create indexes for better performance
|
||||
CREATE INDEX IF NOT EXISTS document_chunks_document_id_idx ON document_chunks(document_id);
|
||||
CREATE INDEX IF NOT EXISTS document_chunks_embedding_idx ON document_chunks USING ivfflat (embedding vector_cosine_ops);
|
||||
|
||||
-- Create function to enable pgvector (for RPC calls)
|
||||
CREATE OR REPLACE FUNCTION enable_pgvector()
|
||||
RETURNS VOID AS $$
|
||||
BEGIN
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create function to create document_chunks table (for RPC calls)
|
||||
CREATE OR REPLACE FUNCTION create_document_chunks_table()
|
||||
RETURNS VOID AS $$
|
||||
BEGIN
|
||||
CREATE TABLE IF NOT EXISTS document_chunks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
document_id VARCHAR(255) NOT NULL,
|
||||
chunk_index INTEGER NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
embedding vector(1536),
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS document_chunks_document_id_idx ON document_chunks(document_id);
|
||||
CREATE INDEX IF NOT EXISTS document_chunks_embedding_idx ON document_chunks USING ivfflat (embedding vector_cosine_ops);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create function to match documents based on vector similarity
|
||||
CREATE OR REPLACE FUNCTION match_documents(
|
||||
query_embedding vector(1536),
|
||||
match_threshold float DEFAULT 0.7,
|
||||
match_count int DEFAULT 10
|
||||
)
|
||||
RETURNS TABLE(
|
||||
id UUID,
|
||||
document_id UUID NOT NULL,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
document_id VARCHAR(255),
|
||||
similarity FLOAT
|
||||
) AS $$
|
||||
embedding VECTOR(1536),
|
||||
chunk_index INTEGER,
|
||||
section TEXT,
|
||||
page_number INTEGER,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create the vector_similarity_searches table
|
||||
CREATE TABLE IF NOT EXISTS vector_similarity_searches (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
user_id UUID,
|
||||
query_text TEXT,
|
||||
query_embedding VECTOR(1536),
|
||||
search_results JSONB,
|
||||
filters JSONB,
|
||||
limit_count INTEGER,
|
||||
similarity_threshold REAL,
|
||||
processing_time_ms INTEGER,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create the function to count distinct documents
|
||||
CREATE OR REPLACE FUNCTION count_distinct_documents()
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN (SELECT COUNT(DISTINCT document_id) FROM document_chunks);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create the function to get the average chunk size
|
||||
CREATE OR REPLACE FUNCTION average_chunk_size()
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN (SELECT AVG(LENGTH(content)) FROM document_chunks);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create the function to get search analytics
|
||||
CREATE OR REPLACE FUNCTION get_search_analytics(user_id_param UUID, days_param INTEGER)
|
||||
RETURNS TABLE(query_text TEXT, search_count BIGINT) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
document_chunks.id,
|
||||
document_chunks.content,
|
||||
document_chunks.metadata,
|
||||
document_chunks.document_id,
|
||||
1 - (document_chunks.embedding <=> query_embedding) AS similarity
|
||||
FROM document_chunks
|
||||
WHERE 1 - (document_chunks.embedding <=> query_embedding) > match_threshold
|
||||
ORDER BY document_chunks.embedding <=> query_embedding
|
||||
LIMIT match_count;
|
||||
vs.query_text,
|
||||
COUNT(*) as search_count
|
||||
FROM
|
||||
vector_similarity_searches vs
|
||||
WHERE
|
||||
vs.user_id = user_id_param AND
|
||||
vs.created_at >= NOW() - (days_param * INTERVAL '1 day')
|
||||
GROUP BY
|
||||
vs.query_text
|
||||
ORDER BY
|
||||
search_count DESC
|
||||
LIMIT 20;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Enable Row Level Security (RLS) if needed
|
||||
-- ALTER TABLE document_chunks ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
-- Create policies for RLS (adjust as needed for your auth requirements)
|
||||
-- CREATE POLICY "Users can view all document chunks" ON document_chunks FOR SELECT USING (true);
|
||||
-- CREATE POLICY "Users can insert document chunks" ON document_chunks FOR INSERT WITH CHECK (true);
|
||||
-- CREATE POLICY "Users can update document chunks" ON document_chunks FOR UPDATE USING (true);
|
||||
-- CREATE POLICY "Users can delete document chunks" ON document_chunks FOR DELETE USING (true);
|
||||
|
||||
-- Grant necessary permissions
|
||||
GRANT ALL ON document_chunks TO authenticated;
|
||||
GRANT ALL ON document_chunks TO anon;
|
||||
GRANT EXECUTE ON FUNCTION match_documents TO authenticated;
|
||||
GRANT EXECUTE ON FUNCTION match_documents TO anon;
|
||||
-- Create the function to get vector database stats
|
||||
CREATE OR REPLACE FUNCTION get_vector_database_stats()
|
||||
RETURNS TABLE(total_chunks BIGINT, total_documents BIGINT, average_similarity REAL) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM document_chunks),
|
||||
(SELECT COUNT(DISTINCT document_id) FROM document_chunks),
|
||||
(SELECT AVG(similarity_score) FROM document_similarities WHERE similarity_score > 0);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
Reference in New Issue
Block a user