Memory/QMD: harden multi-collection search and embed scheduling
This commit is contained in:
@@ -43,6 +43,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Security/Agents: restrict local MEDIA tool attachments to core tools and the OpenClaw temp root to prevent untrusted MCP tool file exfiltration. Thanks @NucleiAv and @thewilloftheshadow.
|
||||
- macOS/Build: default release packaging to `BUNDLE_ID=ai.openclaw.mac` in `scripts/package-mac-dist.sh`, so Sparkle feed URL is retained and auto-update no longer fails with an empty appcast feed. (#19750) thanks @loganprit.
|
||||
- Gateway/Pairing: clear persisted paired-device state when the gateway client closes with `device token mismatch` (`1008`) so reconnect flows can cleanly re-enter pairing. (#22071) Thanks @mbelinky.
|
||||
- Memory/QMD: respect per-agent `memorySearch.enabled=false` during gateway QMD startup initialization, split multi-collection QMD searches into per-collection queries (`search`/`vsearch`/`query`) to avoid sparse-term drops, prefer collection-hinted doc resolution to avoid stale-hash collisions, retry boot updates on transient lock/timeout failures, skip `qmd embed` in BM25-only `search` mode (including `memory index --force`), and serialize embed runs globally with failure backoff to prevent CPU storms on multi-agent hosts. (#20581, #21590, #20513, #20001, #21266, #21583, #20346, #19493) Thanks @danielrevivo, @zanderkrause, @sunyan034-cmd, @tilleulenspiegel, @dae-oss, @adamlongcreativellc, @jonathanadams96, and @kiliansitel.
|
||||
- Signal/Outbound: preserve case for Base64 group IDs during outbound target normalization so cross-context routing and policy checks no longer break when group IDs include uppercase characters. (#5578) Thanks @heyhudson.
|
||||
- Providers/Copilot: drop persisted assistant `thinking` blocks for Claude models (while preserving turn structure/tool blocks) so follow-up requests no longer fail on invalid `thinkingSignature` payloads. (#19459) Thanks @jackheuberger.
|
||||
- Providers/Copilot: add `claude-sonnet-4.6` and `claude-sonnet-4.5` to the default GitHub Copilot model catalog and add coverage for model-list/definition helpers. (#20270, fixes #20091) Thanks @Clawborn.
|
||||
|
||||
@@ -73,4 +73,28 @@ describe("startGatewayMemoryBackend", () => {
|
||||
'qmd memory startup initialization armed for agent "ops"',
|
||||
);
|
||||
});
|
||||
|
||||
it("skips agents with memory search disabled", async () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: { memorySearch: { enabled: true } },
|
||||
list: [
|
||||
{ id: "main", default: true },
|
||||
{ id: "ops", memorySearch: { enabled: false } },
|
||||
],
|
||||
},
|
||||
memory: { backend: "qmd", qmd: {} },
|
||||
} as OpenClawConfig;
|
||||
const log = { info: vi.fn(), warn: vi.fn() };
|
||||
getMemorySearchManagerMock.mockResolvedValue({ manager: { search: vi.fn() } });
|
||||
|
||||
await startGatewayMemoryBackend({ cfg, log });
|
||||
|
||||
expect(getMemorySearchManagerMock).toHaveBeenCalledTimes(1);
|
||||
expect(getMemorySearchManagerMock).toHaveBeenCalledWith({ cfg, agentId: "main" });
|
||||
expect(log.info).toHaveBeenCalledWith(
|
||||
'qmd memory startup initialization armed for agent "main"',
|
||||
);
|
||||
expect(log.warn).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { listAgentIds } from "../agents/agent-scope.js";
|
||||
import { resolveMemorySearchConfig } from "../agents/memory-search.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { resolveMemoryBackendConfig } from "../memory/backend-config.js";
|
||||
import { getMemorySearchManager } from "../memory/index.js";
|
||||
@@ -9,6 +10,9 @@ export async function startGatewayMemoryBackend(params: {
|
||||
}): Promise<void> {
|
||||
const agentIds = listAgentIds(params.cfg);
|
||||
for (const agentId of agentIds) {
|
||||
if (!resolveMemorySearchConfig(params.cfg, agentId)) {
|
||||
continue;
|
||||
}
|
||||
const resolved = resolveMemoryBackendConfig({ cfg: params.cfg, agentId });
|
||||
if (resolved.backend !== "qmd" || !resolved.qmd) {
|
||||
continue;
|
||||
|
||||
@@ -156,17 +156,17 @@ describe("QmdMemoryManager", () => {
|
||||
const baselineCalls = spawnMock.mock.calls.length;
|
||||
|
||||
await manager.sync({ reason: "manual" });
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
|
||||
|
||||
await manager.sync({ reason: "manual-again" });
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
|
||||
|
||||
(manager as unknown as { lastUpdateAt: number | null }).lastUpdateAt =
|
||||
Date.now() - (resolved.qmd?.update.debounceMs ?? 0) - 10;
|
||||
|
||||
await manager.sync({ reason: "after-wait" });
|
||||
// By default we refresh embeddings less frequently than index updates.
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 3);
|
||||
// `search` mode does not require qmd embed side effects.
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
|
||||
|
||||
await manager.close();
|
||||
});
|
||||
@@ -359,7 +359,7 @@ describe("QmdMemoryManager", () => {
|
||||
expect(addSessions?.[2]).toBe(path.join(stateDir, "agents", devAgentId, "qmd", "sessions"));
|
||||
});
|
||||
|
||||
it("rebinds sessions collection when qmd only reports collection names", async () => {
|
||||
it("rebinds managed collections when qmd only reports collection names", async () => {
|
||||
cfg = {
|
||||
...cfg,
|
||||
memory: {
|
||||
@@ -396,6 +396,11 @@ describe("QmdMemoryManager", () => {
|
||||
args[0] === "collection" && args[1] === "remove" && args[2] === sessionCollectionName,
|
||||
);
|
||||
expect(removeSessions).toBeDefined();
|
||||
const removeWorkspace = commands.find(
|
||||
(args) =>
|
||||
args[0] === "collection" && args[1] === "remove" && args[2] === `workspace-${agentId}`,
|
||||
);
|
||||
expect(removeWorkspace).toBeDefined();
|
||||
|
||||
const addSessions = commands.find((args) => {
|
||||
if (args[0] !== "collection" || args[1] !== "add") {
|
||||
@@ -415,6 +420,7 @@ describe("QmdMemoryManager", () => {
|
||||
backend: "qmd",
|
||||
qmd: {
|
||||
includeDefaultMemory: false,
|
||||
searchMode: "query",
|
||||
update: {
|
||||
interval: "0s",
|
||||
debounceMs: 0,
|
||||
@@ -792,23 +798,16 @@ describe("QmdMemoryManager", () => {
|
||||
const { manager, resolved } = await createManager();
|
||||
|
||||
await manager.search("test", { sessionKey: "agent:main:slack:dm:u123" });
|
||||
const searchCall = spawnMock.mock.calls.find(
|
||||
(call: unknown[]) => (call[1] as string[])?.[0] === "search",
|
||||
);
|
||||
const maxResults = resolved.qmd?.limits.maxResults;
|
||||
if (!maxResults) {
|
||||
throw new Error("qmd maxResults missing");
|
||||
}
|
||||
expect(searchCall?.[1]).toEqual([
|
||||
"search",
|
||||
"test",
|
||||
"--json",
|
||||
"-n",
|
||||
String(maxResults),
|
||||
"-c",
|
||||
"workspace-main",
|
||||
"-c",
|
||||
"notes-main",
|
||||
const searchCalls = spawnMock.mock.calls
|
||||
.map((call: unknown[]) => call[1] as string[])
|
||||
.filter((args: string[]) => args[0] === "search");
|
||||
expect(searchCalls).toEqual([
|
||||
["search", "test", "--json", "-n", String(maxResults), "-c", "workspace-main"],
|
||||
["search", "test", "--json", "-n", String(maxResults), "-c", "notes-main"],
|
||||
]);
|
||||
await manager.close();
|
||||
});
|
||||
@@ -904,17 +903,7 @@ describe("QmdMemoryManager", () => {
|
||||
.map((call: unknown[]) => call[1] as string[])
|
||||
.filter((args: string[]) => args[0] === "search" || args[0] === "query");
|
||||
expect(searchAndQueryCalls).toEqual([
|
||||
[
|
||||
"search",
|
||||
"test",
|
||||
"--json",
|
||||
"-n",
|
||||
String(maxResults),
|
||||
"-c",
|
||||
"workspace-main",
|
||||
"-c",
|
||||
"notes-main",
|
||||
],
|
||||
["search", "test", "--json", "-n", String(maxResults), "-c", "workspace-main"],
|
||||
["query", "test", "--json", "-n", String(maxResults), "-c", "workspace-main"],
|
||||
["query", "test", "--json", "-n", String(maxResults), "-c", "notes-main"],
|
||||
]);
|
||||
@@ -984,6 +973,70 @@ describe("QmdMemoryManager", () => {
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("skips qmd embed in search mode even for forced sync", async () => {
|
||||
cfg = {
|
||||
...cfg,
|
||||
memory: {
|
||||
backend: "qmd",
|
||||
qmd: {
|
||||
includeDefaultMemory: false,
|
||||
searchMode: "search",
|
||||
update: { interval: "0s", debounceMs: 0, onBoot: false },
|
||||
paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }],
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const { manager } = await createManager({ mode: "status" });
|
||||
await manager.sync({ reason: "manual", force: true });
|
||||
|
||||
const commandCalls = spawnMock.mock.calls
|
||||
.map((call: unknown[]) => call[1] as string[])
|
||||
.filter((args: string[]) => args[0] === "update" || args[0] === "embed");
|
||||
expect(commandCalls).toEqual([["update"]]);
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("retries boot update when qmd reports a retryable lock error", async () => {
|
||||
cfg = {
|
||||
...cfg,
|
||||
memory: {
|
||||
backend: "qmd",
|
||||
qmd: {
|
||||
includeDefaultMemory: false,
|
||||
searchMode: "search",
|
||||
update: {
|
||||
interval: "0s",
|
||||
debounceMs: 60_000,
|
||||
onBoot: true,
|
||||
waitForBootSync: true,
|
||||
},
|
||||
paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }],
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
let updateCalls = 0;
|
||||
spawnMock.mockImplementation((_cmd: string, args: string[]) => {
|
||||
if (args[0] === "update") {
|
||||
updateCalls += 1;
|
||||
const child = createMockChild({ autoClose: false });
|
||||
if (updateCalls === 1) {
|
||||
emitAndClose(child, "stderr", "SQLITE_BUSY: database is locked", 2);
|
||||
} else {
|
||||
emitAndClose(child, "stdout", "", 0);
|
||||
}
|
||||
return child;
|
||||
}
|
||||
return createMockChild();
|
||||
});
|
||||
|
||||
const { manager } = await createManager({ mode: "full" });
|
||||
|
||||
expect(updateCalls).toBe(2);
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("scopes by channel for agent-prefixed session keys", async () => {
|
||||
cfg = {
|
||||
...cfg,
|
||||
@@ -1170,7 +1223,7 @@ describe("QmdMemoryManager", () => {
|
||||
};
|
||||
inner.db = {
|
||||
prepare: () => ({
|
||||
get: () => {
|
||||
all: () => {
|
||||
throw new Error("SQLITE_BUSY: database is locked");
|
||||
},
|
||||
}),
|
||||
@@ -1198,11 +1251,11 @@ describe("QmdMemoryManager", () => {
|
||||
|
||||
const { manager } = await createManager();
|
||||
const inner = manager as unknown as {
|
||||
db: { prepare: () => { get: () => never }; close: () => void } | null;
|
||||
db: { prepare: () => { all: () => never }; close: () => void } | null;
|
||||
};
|
||||
inner.db = {
|
||||
prepare: () => ({
|
||||
get: () => {
|
||||
all: () => {
|
||||
throw new Error("SQLITE_BUSY: database is locked");
|
||||
},
|
||||
}),
|
||||
@@ -1235,19 +1288,19 @@ describe("QmdMemoryManager", () => {
|
||||
const { manager } = await createManager();
|
||||
|
||||
const inner = manager as unknown as {
|
||||
db: { prepare: (query: string) => { get: (arg: unknown) => unknown }; close: () => void };
|
||||
db: { prepare: (query: string) => { all: (arg: unknown) => unknown }; close: () => void };
|
||||
};
|
||||
inner.db = {
|
||||
prepare: (query: string) => {
|
||||
prepareCalls.push(query);
|
||||
return {
|
||||
get: (arg: unknown) => {
|
||||
all: (arg: unknown) => {
|
||||
if (query.includes("hash = ?")) {
|
||||
return undefined;
|
||||
return [];
|
||||
}
|
||||
if (query.includes("hash LIKE ?")) {
|
||||
expect(arg).toBe(`${exactDocid}%`);
|
||||
return { collection: "workspace-main", path: "notes/welcome.md" };
|
||||
return [{ collection: "workspace-main", path: "notes/welcome.md" }];
|
||||
}
|
||||
throw new Error(`unexpected sqlite query: ${query}`);
|
||||
},
|
||||
@@ -1274,6 +1327,76 @@ describe("QmdMemoryManager", () => {
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("prefers collection hint when resolving duplicate qmd document hashes", async () => {
|
||||
cfg = {
|
||||
...cfg,
|
||||
memory: {
|
||||
backend: "qmd",
|
||||
qmd: {
|
||||
includeDefaultMemory: false,
|
||||
update: { interval: "0s", debounceMs: 60_000, onBoot: false },
|
||||
paths: [
|
||||
{ path: workspaceDir, pattern: "**/*.md", name: "workspace" },
|
||||
{ path: path.join(workspaceDir, "notes"), pattern: "**/*.md", name: "notes" },
|
||||
],
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const duplicateDocid = "dup-123";
|
||||
spawnMock.mockImplementation((_cmd: string, args: string[]) => {
|
||||
if (args[0] === "search" && args.includes("workspace-main")) {
|
||||
const child = createMockChild({ autoClose: false });
|
||||
emitAndClose(
|
||||
child,
|
||||
"stdout",
|
||||
JSON.stringify([
|
||||
{ docid: duplicateDocid, score: 0.9, snippet: "@@ -3,1\nworkspace hit" },
|
||||
]),
|
||||
);
|
||||
return child;
|
||||
}
|
||||
if (args[0] === "search" && args.includes("notes-main")) {
|
||||
const child = createMockChild({ autoClose: false });
|
||||
emitAndClose(child, "stdout", "[]");
|
||||
return child;
|
||||
}
|
||||
return createMockChild();
|
||||
});
|
||||
|
||||
const { manager } = await createManager();
|
||||
const inner = manager as unknown as {
|
||||
db: { prepare: (query: string) => { all: (arg: unknown) => unknown }; close: () => void };
|
||||
};
|
||||
inner.db = {
|
||||
prepare: (_query: string) => ({
|
||||
all: (arg: unknown) => {
|
||||
if (typeof arg === "string" && arg.startsWith(duplicateDocid)) {
|
||||
return [
|
||||
{ collection: "stale-workspace", path: "notes/welcome.md" },
|
||||
{ collection: "workspace-main", path: "notes/welcome.md" },
|
||||
];
|
||||
}
|
||||
return [];
|
||||
},
|
||||
}),
|
||||
close: () => {},
|
||||
};
|
||||
|
||||
const results = await manager.search("workspace", { sessionKey: "agent:main:slack:dm:u123" });
|
||||
expect(results).toEqual([
|
||||
{
|
||||
path: "notes/welcome.md",
|
||||
startLine: 3,
|
||||
endLine: 3,
|
||||
score: 0.9,
|
||||
snippet: "@@ -3,1\nworkspace hit",
|
||||
source: "memory",
|
||||
},
|
||||
]);
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("errors when qmd output exceeds command output safety cap", async () => {
|
||||
const noisyPayload = "x".repeat(240_000);
|
||||
spawnMock.mockImplementation((_cmd: string, args: string[]) => {
|
||||
|
||||
@@ -34,6 +34,24 @@ const SNIPPET_HEADER_RE = /@@\s*-([0-9]+),([0-9]+)/;
|
||||
const SEARCH_PENDING_UPDATE_WAIT_MS = 500;
|
||||
const MAX_QMD_OUTPUT_CHARS = 200_000;
|
||||
const NUL_MARKER_RE = /(?:\^@|\\0|\\x00|\\u0000|null\s*byte|nul\s*byte)/i;
|
||||
const QMD_EMBED_BACKOFF_BASE_MS = 60_000;
|
||||
const QMD_EMBED_BACKOFF_MAX_MS = 60 * 60 * 1000;
|
||||
|
||||
let qmdEmbedQueueTail: Promise<void> = Promise.resolve();
|
||||
|
||||
async function runWithQmdEmbedLock<T>(task: () => Promise<T>): Promise<T> {
|
||||
const previous = qmdEmbedQueueTail;
|
||||
let release: (() => void) | undefined;
|
||||
qmdEmbedQueueTail = new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
});
|
||||
await previous.catch(() => undefined);
|
||||
try {
|
||||
return await task();
|
||||
} finally {
|
||||
release?.();
|
||||
}
|
||||
}
|
||||
|
||||
type CollectionRoot = {
|
||||
path: string;
|
||||
@@ -104,6 +122,8 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
private db: SqliteDatabase | null = null;
|
||||
private lastUpdateAt: number | null = null;
|
||||
private lastEmbedAt: number | null = null;
|
||||
private embedBackoffUntil: number | null = null;
|
||||
private embedFailureCount = 0;
|
||||
private attemptedNullByteCollectionRepair = false;
|
||||
|
||||
private constructor(params: {
|
||||
@@ -318,8 +338,8 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
): boolean {
|
||||
if (!listed.path) {
|
||||
// Older qmd versions may only return names from `collection list --json`.
|
||||
// Force sessions collections to rebind so per-agent session export paths stay isolated.
|
||||
return collection.kind === "sessions";
|
||||
// Rebind managed collections so stale path bindings cannot survive upgrades.
|
||||
return true;
|
||||
}
|
||||
if (!this.pathsMatch(listed.path, collection.path)) {
|
||||
return true;
|
||||
@@ -407,8 +427,13 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
const qmdSearchCommand = this.qmd.searchMode;
|
||||
let parsed: QmdQueryResult[];
|
||||
try {
|
||||
if (qmdSearchCommand === "query" && collectionNames.length > 1) {
|
||||
parsed = await this.runQueryAcrossCollections(trimmed, limit, collectionNames);
|
||||
if (collectionNames.length > 1) {
|
||||
parsed = await this.runQueryAcrossCollections(
|
||||
trimmed,
|
||||
limit,
|
||||
collectionNames,
|
||||
qmdSearchCommand,
|
||||
);
|
||||
} else {
|
||||
const args = this.buildSearchArgs(qmdSearchCommand, trimmed, limit);
|
||||
args.push(...this.buildCollectionFilterArgs(collectionNames));
|
||||
@@ -424,7 +449,7 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
);
|
||||
try {
|
||||
if (collectionNames.length > 1) {
|
||||
parsed = await this.runQueryAcrossCollections(trimmed, limit, collectionNames);
|
||||
parsed = await this.runQueryAcrossCollections(trimmed, limit, collectionNames, "query");
|
||||
} else {
|
||||
const fallbackArgs = this.buildSearchArgs("query", trimmed, limit);
|
||||
fallbackArgs.push(...this.buildCollectionFilterArgs(collectionNames));
|
||||
@@ -444,7 +469,10 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
}
|
||||
const results: MemorySearchResult[] = [];
|
||||
for (const entry of parsed) {
|
||||
const doc = await this.resolveDocLocation(entry.docid);
|
||||
const doc = await this.resolveDocLocation(entry.docid, {
|
||||
preferredCollection: entry.collection,
|
||||
preferredFile: entry.file,
|
||||
});
|
||||
if (!doc) {
|
||||
continue;
|
||||
}
|
||||
@@ -605,25 +633,17 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
if (this.sessionExporter) {
|
||||
await this.exportSessions();
|
||||
}
|
||||
try {
|
||||
await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs });
|
||||
} catch (err) {
|
||||
if (!(await this.tryRepairNullByteCollections(err, reason))) {
|
||||
throw err;
|
||||
}
|
||||
await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs });
|
||||
}
|
||||
const embedIntervalMs = this.qmd.update.embedIntervalMs;
|
||||
const shouldEmbed =
|
||||
Boolean(force) ||
|
||||
this.lastEmbedAt === null ||
|
||||
(embedIntervalMs > 0 && Date.now() - this.lastEmbedAt > embedIntervalMs);
|
||||
if (shouldEmbed) {
|
||||
await this.runQmdUpdateWithRetry(reason);
|
||||
if (this.shouldRunEmbed(force)) {
|
||||
try {
|
||||
await this.runQmd(["embed"], { timeoutMs: this.qmd.update.embedTimeoutMs });
|
||||
await runWithQmdEmbedLock(async () => {
|
||||
await this.runQmd(["embed"], { timeoutMs: this.qmd.update.embedTimeoutMs });
|
||||
});
|
||||
this.lastEmbedAt = Date.now();
|
||||
this.embedBackoffUntil = null;
|
||||
this.embedFailureCount = 0;
|
||||
} catch (err) {
|
||||
log.warn(`qmd embed failed (${reason}): ${String(err)}`);
|
||||
this.noteEmbedFailure(reason, err);
|
||||
}
|
||||
}
|
||||
this.lastUpdateAt = Date.now();
|
||||
@@ -635,6 +655,74 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
await this.pendingUpdate;
|
||||
}
|
||||
|
||||
private async runQmdUpdateWithRetry(reason: string): Promise<void> {
|
||||
const isBootRun = reason === "boot" || reason.startsWith("boot:");
|
||||
const maxAttempts = isBootRun ? 3 : 1;
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
try {
|
||||
await this.runQmdUpdateOnce(reason);
|
||||
return;
|
||||
} catch (err) {
|
||||
if (attempt >= maxAttempts || !this.isRetryableUpdateError(err)) {
|
||||
throw err;
|
||||
}
|
||||
const delayMs = 500 * 2 ** (attempt - 1);
|
||||
log.warn(
|
||||
`qmd update retry ${attempt}/${maxAttempts - 1} after failure (${reason}): ${String(err)}`,
|
||||
);
|
||||
await new Promise<void>((resolve) => setTimeout(resolve, delayMs));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async runQmdUpdateOnce(reason: string): Promise<void> {
|
||||
try {
|
||||
await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs });
|
||||
} catch (err) {
|
||||
if (!(await this.tryRepairNullByteCollections(err, reason))) {
|
||||
throw err;
|
||||
}
|
||||
await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs });
|
||||
}
|
||||
}
|
||||
|
||||
private isRetryableUpdateError(err: unknown): boolean {
|
||||
if (this.isSqliteBusyError(err)) {
|
||||
return true;
|
||||
}
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
const normalized = message.toLowerCase();
|
||||
return normalized.includes("timed out");
|
||||
}
|
||||
|
||||
private shouldRunEmbed(force?: boolean): boolean {
|
||||
if (this.qmd.searchMode === "search") {
|
||||
return false;
|
||||
}
|
||||
const now = Date.now();
|
||||
if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) {
|
||||
return false;
|
||||
}
|
||||
const embedIntervalMs = this.qmd.update.embedIntervalMs;
|
||||
return (
|
||||
Boolean(force) ||
|
||||
this.lastEmbedAt === null ||
|
||||
(embedIntervalMs > 0 && now - this.lastEmbedAt > embedIntervalMs)
|
||||
);
|
||||
}
|
||||
|
||||
private noteEmbedFailure(reason: string, err: unknown): void {
|
||||
this.embedFailureCount += 1;
|
||||
const delayMs = Math.min(
|
||||
QMD_EMBED_BACKOFF_MAX_MS,
|
||||
QMD_EMBED_BACKOFF_BASE_MS * 2 ** Math.max(0, this.embedFailureCount - 1),
|
||||
);
|
||||
this.embedBackoffUntil = Date.now() + delayMs;
|
||||
log.warn(
|
||||
`qmd embed failed (${reason}): ${String(err)}; backing off for ${Math.ceil(delayMs / 1000)}s`,
|
||||
);
|
||||
}
|
||||
|
||||
private enqueueForcedUpdate(reason: string): Promise<void> {
|
||||
this.queuedForcedRuns += 1;
|
||||
if (!this.queuedForcedUpdate) {
|
||||
@@ -916,6 +1004,7 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
|
||||
private async resolveDocLocation(
|
||||
docid?: string,
|
||||
hints?: { preferredCollection?: string; preferredFile?: string },
|
||||
): Promise<{ rel: string; abs: string; source: MemorySource } | null> {
|
||||
if (!docid) {
|
||||
return null;
|
||||
@@ -924,23 +1013,21 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
const cached = this.docPathCache.get(normalized);
|
||||
const cacheKey = `${hints?.preferredCollection ?? "*"}:${normalized}`;
|
||||
const cached = this.docPathCache.get(cacheKey);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const db = this.ensureDb();
|
||||
let row: { collection: string; path: string } | undefined;
|
||||
let rows: Array<{ collection: string; path: string }> = [];
|
||||
try {
|
||||
const exact = db
|
||||
.prepare("SELECT collection, path FROM documents WHERE hash = ? AND active = 1 LIMIT 1")
|
||||
.get(normalized) as { collection: string; path: string } | undefined;
|
||||
row = exact;
|
||||
if (!row) {
|
||||
row = db
|
||||
.prepare(
|
||||
"SELECT collection, path FROM documents WHERE hash LIKE ? AND active = 1 LIMIT 1",
|
||||
)
|
||||
.get(`${normalized}%`) as { collection: string; path: string } | undefined;
|
||||
rows = db
|
||||
.prepare("SELECT collection, path FROM documents WHERE hash = ? AND active = 1")
|
||||
.all(normalized) as Array<{ collection: string; path: string }>;
|
||||
if (rows.length === 0) {
|
||||
rows = db
|
||||
.prepare("SELECT collection, path FROM documents WHERE hash LIKE ? AND active = 1")
|
||||
.all(`${normalized}%`) as Array<{ collection: string; path: string }>;
|
||||
}
|
||||
} catch (err) {
|
||||
if (this.isSqliteBusyError(err)) {
|
||||
@@ -949,17 +1036,54 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (!row) {
|
||||
if (rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const location = this.toDocLocation(row.collection, row.path);
|
||||
const location = this.pickDocLocation(rows, hints);
|
||||
if (!location) {
|
||||
return null;
|
||||
}
|
||||
this.docPathCache.set(normalized, location);
|
||||
this.docPathCache.set(cacheKey, location);
|
||||
return location;
|
||||
}
|
||||
|
||||
private pickDocLocation(
|
||||
rows: Array<{ collection: string; path: string }>,
|
||||
hints?: { preferredCollection?: string; preferredFile?: string },
|
||||
): { rel: string; abs: string; source: MemorySource } | null {
|
||||
if (hints?.preferredCollection) {
|
||||
for (const row of rows) {
|
||||
if (row.collection !== hints.preferredCollection) {
|
||||
continue;
|
||||
}
|
||||
const location = this.toDocLocation(row.collection, row.path);
|
||||
if (location) {
|
||||
return location;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hints?.preferredFile) {
|
||||
const preferred = path.normalize(hints.preferredFile);
|
||||
for (const row of rows) {
|
||||
const rowPath = path.normalize(row.path);
|
||||
if (rowPath !== preferred && !rowPath.endsWith(path.sep + preferred)) {
|
||||
continue;
|
||||
}
|
||||
const location = this.toDocLocation(row.collection, row.path);
|
||||
if (location) {
|
||||
return location;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const row of rows) {
|
||||
const location = this.toDocLocation(row.collection, row.path);
|
||||
if (location) {
|
||||
return location;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private extractSnippetLines(snippet: string): { startLine: number; endLine: number } {
|
||||
const match = SNIPPET_HEADER_RE.exec(snippet);
|
||||
if (match) {
|
||||
@@ -1199,25 +1323,38 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
query: string,
|
||||
limit: number,
|
||||
collectionNames: string[],
|
||||
command: "query" | "search" | "vsearch",
|
||||
): Promise<QmdQueryResult[]> {
|
||||
log.debug(
|
||||
`qmd query multi-collection workaround active (${collectionNames.length} collections)`,
|
||||
`qmd ${command} multi-collection workaround active (${collectionNames.length} collections)`,
|
||||
);
|
||||
const bestByDocId = new Map<string, QmdQueryResult>();
|
||||
for (const collectionName of collectionNames) {
|
||||
const args = this.buildSearchArgs("query", query, limit);
|
||||
const args = this.buildSearchArgs(command, query, limit);
|
||||
args.push("-c", collectionName);
|
||||
const result = await this.runQmd(args, { timeoutMs: this.qmd.limits.timeoutMs });
|
||||
const parsed = parseQmdQueryJson(result.stdout, result.stderr);
|
||||
for (const entry of parsed) {
|
||||
if (typeof entry.docid !== "string" || !entry.docid.trim()) {
|
||||
const normalizedDocId =
|
||||
typeof entry.docid === "string" && entry.docid.trim().length > 0
|
||||
? entry.docid
|
||||
: undefined;
|
||||
if (!normalizedDocId) {
|
||||
continue;
|
||||
}
|
||||
const prev = bestByDocId.get(entry.docid);
|
||||
const withCollection = {
|
||||
...entry,
|
||||
docid: normalizedDocId,
|
||||
collection: entry.collection ?? collectionName,
|
||||
} satisfies QmdQueryResult;
|
||||
const prev = bestByDocId.get(normalizedDocId);
|
||||
const prevScore = typeof prev?.score === "number" ? prev.score : Number.NEGATIVE_INFINITY;
|
||||
const nextScore = typeof entry.score === "number" ? entry.score : Number.NEGATIVE_INFINITY;
|
||||
const nextScore =
|
||||
typeof withCollection.score === "number"
|
||||
? withCollection.score
|
||||
: Number.NEGATIVE_INFINITY;
|
||||
if (!prev || nextScore > prevScore) {
|
||||
bestByDocId.set(entry.docid, entry);
|
||||
bestByDocId.set(normalizedDocId, withCollection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ const log = createSubsystemLogger("memory");
|
||||
export type QmdQueryResult = {
|
||||
docid?: string;
|
||||
score?: number;
|
||||
collection?: string;
|
||||
file?: string;
|
||||
snippet?: string;
|
||||
body?: string;
|
||||
|
||||
Reference in New Issue
Block a user