-- Create the document_chunks table CREATE TABLE IF NOT EXISTS document_chunks ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), document_id UUID NOT NULL, content TEXT, metadata JSONB, embedding VECTOR(1536), chunk_index INTEGER, section TEXT, page_number INTEGER, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- Create the vector_similarity_searches table CREATE TABLE IF NOT EXISTS vector_similarity_searches ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), user_id UUID, query_text TEXT, query_embedding VECTOR(1536), search_results JSONB, filters JSONB, limit_count INTEGER, similarity_threshold REAL, processing_time_ms INTEGER, created_at TIMESTAMPTZ DEFAULT NOW() ); -- Create the function to count distinct documents CREATE OR REPLACE FUNCTION count_distinct_documents() RETURNS INTEGER AS $$ BEGIN RETURN (SELECT COUNT(DISTINCT document_id) FROM document_chunks); END; $$ LANGUAGE plpgsql; -- Create the function to get the average chunk size CREATE OR REPLACE FUNCTION average_chunk_size() RETURNS INTEGER AS $$ BEGIN RETURN (SELECT AVG(LENGTH(content)) FROM document_chunks); END; $$ LANGUAGE plpgsql; -- Create the function to get search analytics CREATE OR REPLACE FUNCTION get_search_analytics(user_id_param UUID, days_param INTEGER) RETURNS TABLE(query_text TEXT, search_count BIGINT) AS $$ BEGIN RETURN QUERY SELECT vs.query_text, COUNT(*) as search_count FROM vector_similarity_searches vs WHERE vs.user_id = user_id_param AND vs.created_at >= NOW() - (days_param * INTERVAL '1 day') GROUP BY vs.query_text ORDER BY search_count DESC LIMIT 20; END; $$ LANGUAGE plpgsql; -- Create the function to get vector database stats CREATE OR REPLACE FUNCTION get_vector_database_stats() RETURNS TABLE(total_chunks BIGINT, total_documents BIGINT, average_similarity REAL) AS $$ BEGIN RETURN QUERY SELECT (SELECT COUNT(*) FROM document_chunks), (SELECT COUNT(DISTINCT document_id) FROM document_chunks), (SELECT AVG(similarity_score) FROM document_similarities WHERE similarity_score > 0); END; $$ LANGUAGE plpgsql;