From 5219f74615a5672a591ffe0fd4e9e5eca484fd49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Azade=20=F0=9F=90=90?= <azade@hey.com>
Date: Fri, 13 Feb 2026 12:47:53 +0000
Subject: [PATCH] fix(memory): use QAT variant of embedding model for better
 quality

Switch default local embedding model from embeddinggemma-300M to
embeddinggemma-300m-qat (Quantization Aware Training). QAT models are
trained with quantization in mind, yielding better embedding quality
at the same size (Q8_0).
---
 src/memory/embeddings.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/memory/embeddings.ts b/src/memory/embeddings.ts
index a81f5fbab..77d7eb8c4 100644
--- a/src/memory/embeddings.ts
+++ b/src/memory/embeddings.ts
@@ -56,7 +56,8 @@ export type EmbeddingProviderOptions = {
   };
 };
 
-const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
+const DEFAULT_LOCAL_MODEL =
+  "hf:ggml-org/embeddinggemma-300m-qat-q8_0-GGUF/embeddinggemma-300m-qat-Q8_0.gguf";
 
 function canAutoSelectLocal(options: EmbeddingProviderOptions): boolean {
   const modelPath = options.local?.modelPath?.trim();