77 lines
2.1 KiB
PL/PgSQL
77 lines
2.1 KiB
PL/PgSQL
-- Create the document_chunks table
|
|
CREATE TABLE IF NOT EXISTS document_chunks (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
document_id UUID NOT NULL,
|
|
content TEXT,
|
|
metadata JSONB,
|
|
embedding VECTOR(1536),
|
|
chunk_index INTEGER,
|
|
section TEXT,
|
|
page_number INTEGER,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Create the vector_similarity_searches table
|
|
CREATE TABLE IF NOT EXISTS vector_similarity_searches (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
user_id UUID,
|
|
query_text TEXT,
|
|
query_embedding VECTOR(1536),
|
|
search_results JSONB,
|
|
filters JSONB,
|
|
limit_count INTEGER,
|
|
similarity_threshold REAL,
|
|
processing_time_ms INTEGER,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Create the function to count distinct documents
|
|
CREATE OR REPLACE FUNCTION count_distinct_documents()
|
|
RETURNS INTEGER AS $$
|
|
BEGIN
|
|
RETURN (SELECT COUNT(DISTINCT document_id) FROM document_chunks);
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Create the function to get the average chunk size
|
|
CREATE OR REPLACE FUNCTION average_chunk_size()
|
|
RETURNS INTEGER AS $$
|
|
BEGIN
|
|
RETURN (SELECT AVG(LENGTH(content)) FROM document_chunks);
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Create the function to get search analytics
|
|
CREATE OR REPLACE FUNCTION get_search_analytics(user_id_param UUID, days_param INTEGER)
|
|
RETURNS TABLE(query_text TEXT, search_count BIGINT) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT
|
|
vs.query_text,
|
|
COUNT(*) as search_count
|
|
FROM
|
|
vector_similarity_searches vs
|
|
WHERE
|
|
vs.user_id = user_id_param AND
|
|
vs.created_at >= NOW() - (days_param * INTERVAL '1 day')
|
|
GROUP BY
|
|
vs.query_text
|
|
ORDER BY
|
|
search_count DESC
|
|
LIMIT 20;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Create the function to get vector database stats
|
|
CREATE OR REPLACE FUNCTION get_vector_database_stats()
|
|
RETURNS TABLE(total_chunks BIGINT, total_documents BIGINT, average_similarity REAL) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT
|
|
(SELECT COUNT(*) FROM document_chunks),
|
|
(SELECT COUNT(DISTINCT document_id) FROM document_chunks),
|
|
(SELECT AVG(similarity_score) FROM document_similarities WHERE similarity_score > 0);
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|