-- Fix vector search timeout by pre-filtering on document_id BEFORE vector search -- When document_id is provided, this avoids the full IVFFlat index scan (26K+ rows) -- and instead computes distances on only ~80 chunks per document. -- Drop old function signatures DROP FUNCTION IF EXISTS match_document_chunks(vector(1536), float, int); DROP FUNCTION IF EXISTS match_document_chunks(vector(1536), float, int, text); -- Create optimized function that branches based on whether document_id is provided CREATE OR REPLACE FUNCTION match_document_chunks ( query_embedding vector(1536), match_threshold float, match_count int, filter_document_id text DEFAULT NULL ) RETURNS TABLE ( id UUID, document_id VARCHAR(255), content text, metadata JSONB, chunk_index INT, similarity float ) LANGUAGE plpgsql STABLE AS $$ BEGIN IF filter_document_id IS NOT NULL THEN -- FAST PATH: Pre-filter by document_id using btree index, then compute -- vector distances on only that document's chunks (~80 rows). -- This completely bypasses the IVFFlat index scan. RETURN QUERY SELECT dc.id, dc.document_id, dc.content, dc.metadata, dc.chunk_index, 1 - (dc.embedding <=> query_embedding) AS similarity FROM document_chunks dc WHERE dc.document_id = filter_document_id AND dc.embedding IS NOT NULL AND 1 - (dc.embedding <=> query_embedding) > match_threshold ORDER BY dc.embedding <=> query_embedding LIMIT match_count; ELSE -- SLOW PATH: Search across all documents using IVFFlat index. -- Only used when no document_id filter is provided. RETURN QUERY SELECT dc.id, dc.document_id, dc.content, dc.metadata, dc.chunk_index, 1 - (dc.embedding <=> query_embedding) AS similarity FROM document_chunks dc WHERE dc.embedding IS NOT NULL AND 1 - (dc.embedding <=> query_embedding) > match_threshold ORDER BY dc.embedding <=> query_embedding LIMIT match_count; END IF; END; $$; COMMENT ON FUNCTION match_document_chunks IS 'Vector search with fast document-scoped path. When filter_document_id is provided, uses btree index to pre-filter (~80 rows) instead of scanning the full IVFFlat index (26K+ rows).';