Files
Moltbot/src/memory/qmd-manager.ts
2026-02-22 14:40:04 -08:00

1901 lines
62 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { spawn } from "node:child_process";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import readline from "node:readline";
import { resolveAgentWorkspaceDir } from "../agents/agent-scope.js";
import type { OpenClawConfig } from "../config/config.js";
import { resolveStateDir } from "../config/paths.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { isFileMissingError, statRegularFile } from "./fs-utils.js";
import { deriveQmdScopeChannel, deriveQmdScopeChatType, isQmdScopeAllowed } from "./qmd-scope.js";
import {
listSessionFilesForAgent,
buildSessionEntry,
type SessionFileEntry,
} from "./session-files.js";
import { requireNodeSqlite } from "./sqlite.js";
import type {
MemoryEmbeddingProbeResult,
MemoryProviderStatus,
MemorySearchManager,
MemorySearchResult,
MemorySource,
MemorySyncProgressUpdate,
} from "./types.js";
type SqliteDatabase = import("node:sqlite").DatabaseSync;
import type {
ResolvedMemoryBackendConfig,
ResolvedQmdConfig,
ResolvedQmdMcporterConfig,
} from "./backend-config.js";
import { parseQmdQueryJson, type QmdQueryResult } from "./qmd-query-parser.js";
import { extractKeywords } from "./query-expansion.js";
const log = createSubsystemLogger("memory");
const SNIPPET_HEADER_RE = /@@\s*-([0-9]+),([0-9]+)/;
const SEARCH_PENDING_UPDATE_WAIT_MS = 500;
const MAX_QMD_OUTPUT_CHARS = 200_000;
const NUL_MARKER_RE = /(?:\^@|\\0|\\x00|\\u0000|null\s*byte|nul\s*byte)/i;
const QMD_EMBED_BACKOFF_BASE_MS = 60_000;
const QMD_EMBED_BACKOFF_MAX_MS = 60 * 60 * 1000;
const HAN_SCRIPT_RE = /[\u3400-\u9fff]/u;
const QMD_BM25_HAN_KEYWORD_LIMIT = 12;
let qmdEmbedQueueTail: Promise<void> = Promise.resolve();
function resolveWindowsCommandShim(command: string): string {
if (process.platform !== "win32") {
return command;
}
const trimmed = command.trim();
if (!trimmed) {
return command;
}
const ext = path.extname(trimmed).toLowerCase();
if (ext === ".cmd" || ext === ".exe" || ext === ".bat") {
return command;
}
const base = path.basename(trimmed).toLowerCase();
if (base === "qmd" || base === "mcporter") {
return `${trimmed}.cmd`;
}
return command;
}
function hasHanScript(value: string): boolean {
return HAN_SCRIPT_RE.test(value);
}
function normalizeHanBm25Query(query: string): string {
const trimmed = query.trim();
if (!trimmed || !hasHanScript(trimmed)) {
return trimmed;
}
const keywords = extractKeywords(trimmed);
const normalizedKeywords: string[] = [];
const seen = new Set<string>();
for (const keyword of keywords) {
const token = keyword.trim();
if (!token || seen.has(token)) {
continue;
}
const includesHan = hasHanScript(token);
// Han unigrams are usually too broad for BM25 and can drown signal.
if (includesHan && Array.from(token).length < 2) {
continue;
}
if (!includesHan && token.length < 2) {
continue;
}
seen.add(token);
normalizedKeywords.push(token);
if (normalizedKeywords.length >= QMD_BM25_HAN_KEYWORD_LIMIT) {
break;
}
}
return normalizedKeywords.length > 0 ? normalizedKeywords.join(" ") : trimmed;
}
async function runWithQmdEmbedLock<T>(task: () => Promise<T>): Promise<T> {
const previous = qmdEmbedQueueTail;
let release: (() => void) | undefined;
qmdEmbedQueueTail = new Promise<void>((resolve) => {
release = resolve;
});
await previous.catch(() => undefined);
try {
return await task();
} finally {
release?.();
}
}
type CollectionRoot = {
path: string;
kind: MemorySource;
};
type SessionExporterConfig = {
dir: string;
retentionMs?: number;
collectionName: string;
};
type ListedCollection = {
path?: string;
pattern?: string;
};
type ManagedCollection = {
name: string;
path: string;
pattern: string;
kind: "memory" | "custom" | "sessions";
};
type QmdManagerMode = "full" | "status";
export class QmdMemoryManager implements MemorySearchManager {
static async create(params: {
cfg: OpenClawConfig;
agentId: string;
resolved: ResolvedMemoryBackendConfig;
mode?: QmdManagerMode;
}): Promise<QmdMemoryManager | null> {
const resolved = params.resolved.qmd;
if (!resolved) {
return null;
}
const manager = new QmdMemoryManager({ cfg: params.cfg, agentId: params.agentId, resolved });
await manager.initialize(params.mode ?? "full");
return manager;
}
private readonly cfg: OpenClawConfig;
private readonly agentId: string;
private readonly qmd: ResolvedQmdConfig;
private readonly workspaceDir: string;
private readonly stateDir: string;
private readonly agentStateDir: string;
private readonly qmdDir: string;
private readonly xdgConfigHome: string;
private readonly xdgCacheHome: string;
private readonly indexPath: string;
private readonly env: NodeJS.ProcessEnv;
private readonly collectionRoots = new Map<string, CollectionRoot>();
private readonly sources = new Set<MemorySource>();
private readonly docPathCache = new Map<
string,
{ rel: string; abs: string; source: MemorySource }
>();
private readonly exportedSessionState = new Map<
string,
{
hash: string;
mtimeMs: number;
target: string;
}
>();
private readonly maxQmdOutputChars = MAX_QMD_OUTPUT_CHARS;
private readonly sessionExporter: SessionExporterConfig | null;
private updateTimer: NodeJS.Timeout | null = null;
private pendingUpdate: Promise<void> | null = null;
private queuedForcedUpdate: Promise<void> | null = null;
private queuedForcedRuns = 0;
private closed = false;
private db: SqliteDatabase | null = null;
private lastUpdateAt: number | null = null;
private lastEmbedAt: number | null = null;
private embedBackoffUntil: number | null = null;
private embedFailureCount = 0;
private attemptedNullByteCollectionRepair = false;
private constructor(params: {
cfg: OpenClawConfig;
agentId: string;
resolved: ResolvedQmdConfig;
}) {
this.cfg = params.cfg;
this.agentId = params.agentId;
this.qmd = params.resolved;
this.workspaceDir = resolveAgentWorkspaceDir(params.cfg, params.agentId);
this.stateDir = resolveStateDir(process.env, os.homedir);
this.agentStateDir = path.join(this.stateDir, "agents", this.agentId);
this.qmdDir = path.join(this.agentStateDir, "qmd");
// QMD uses XDG base dirs for its internal state.
// Collections are managed via `qmd collection add` and stored inside the index DB.
// - config: $XDG_CONFIG_HOME (contexts, etc.)
// - cache: $XDG_CACHE_HOME/qmd/index.sqlite
this.xdgConfigHome = path.join(this.qmdDir, "xdg-config");
this.xdgCacheHome = path.join(this.qmdDir, "xdg-cache");
this.indexPath = path.join(this.xdgCacheHome, "qmd", "index.sqlite");
this.env = {
...process.env,
XDG_CONFIG_HOME: this.xdgConfigHome,
XDG_CACHE_HOME: this.xdgCacheHome,
NO_COLOR: "1",
};
this.sessionExporter = this.qmd.sessions.enabled
? {
dir: this.qmd.sessions.exportDir ?? path.join(this.qmdDir, "sessions"),
retentionMs: this.qmd.sessions.retentionDays
? this.qmd.sessions.retentionDays * 24 * 60 * 60 * 1000
: undefined,
collectionName: this.pickSessionCollectionName(),
}
: null;
if (this.sessionExporter) {
this.qmd.collections = [
...this.qmd.collections,
{
name: this.sessionExporter.collectionName,
path: this.sessionExporter.dir,
pattern: "**/*.md",
kind: "sessions",
},
];
}
}
private async initialize(mode: QmdManagerMode): Promise<void> {
this.bootstrapCollections();
if (mode === "status") {
return;
}
await fs.mkdir(this.xdgConfigHome, { recursive: true });
await fs.mkdir(this.xdgCacheHome, { recursive: true });
await fs.mkdir(path.dirname(this.indexPath), { recursive: true });
if (this.sessionExporter) {
await fs.mkdir(this.sessionExporter.dir, { recursive: true });
}
// QMD stores its ML models under $XDG_CACHE_HOME/qmd/models/. Because we
// override XDG_CACHE_HOME to isolate the index per-agent, qmd would not
// find models installed at the default location (~/.cache/qmd/models/) and
// would attempt to re-download them on every invocation. Symlink the
// default models directory into our custom cache so the index stays
// isolated while models are shared.
await this.symlinkSharedModels();
await this.ensureCollections();
if (this.qmd.update.onBoot) {
const bootRun = this.runUpdate("boot", true);
if (this.qmd.update.waitForBootSync) {
await bootRun.catch((err) => {
log.warn(`qmd boot update failed: ${String(err)}`);
});
} else {
void bootRun.catch((err) => {
log.warn(`qmd boot update failed: ${String(err)}`);
});
}
}
if (this.qmd.update.intervalMs > 0) {
this.updateTimer = setInterval(() => {
void this.runUpdate("interval").catch((err) => {
log.warn(`qmd update failed (${String(err)})`);
});
}, this.qmd.update.intervalMs);
}
}
private bootstrapCollections(): void {
this.collectionRoots.clear();
this.sources.clear();
for (const collection of this.qmd.collections) {
const kind: MemorySource = collection.kind === "sessions" ? "sessions" : "memory";
this.collectionRoots.set(collection.name, { path: collection.path, kind });
this.sources.add(kind);
}
}
private async ensureCollections(): Promise<void> {
// QMD collections are persisted inside the index database and must be created
// via the CLI. Prefer listing existing collections when supported, otherwise
// fall back to best-effort idempotent `qmd collection add`.
const existing = new Map<string, ListedCollection>();
try {
const result = await this.runQmd(["collection", "list", "--json"], {
timeoutMs: this.qmd.update.commandTimeoutMs,
});
const parsed = this.parseListedCollections(result.stdout);
for (const [name, details] of parsed) {
existing.set(name, details);
}
} catch {
// ignore; older qmd versions might not support list --json.
}
await this.migrateLegacyUnscopedCollections(existing);
for (const collection of this.qmd.collections) {
const listed = existing.get(collection.name);
if (listed && !this.shouldRebindCollection(collection, listed)) {
continue;
}
if (listed) {
try {
await this.removeCollection(collection.name);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (!this.isCollectionMissingError(message)) {
log.warn(`qmd collection remove failed for ${collection.name}: ${message}`);
}
}
}
try {
await this.ensureCollectionPath(collection);
await this.addCollection(collection.path, collection.name, collection.pattern);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (this.isCollectionAlreadyExistsError(message)) {
continue;
}
log.warn(`qmd collection add failed for ${collection.name}: ${message}`);
}
}
}
private async migrateLegacyUnscopedCollections(
existing: Map<string, ListedCollection>,
): Promise<void> {
for (const collection of this.qmd.collections) {
if (existing.has(collection.name)) {
continue;
}
const legacyName = this.deriveLegacyCollectionName(collection.name);
if (!legacyName) {
continue;
}
const listedLegacy = existing.get(legacyName);
if (!listedLegacy) {
continue;
}
if (!this.canMigrateLegacyCollection(collection, listedLegacy)) {
log.debug(
`qmd legacy collection migration skipped for ${legacyName} (path/pattern mismatch)`,
);
continue;
}
try {
await this.removeCollection(legacyName);
existing.delete(legacyName);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (!this.isCollectionMissingError(message)) {
log.warn(`qmd collection remove failed for ${legacyName}: ${message}`);
}
}
}
}
private deriveLegacyCollectionName(scopedName: string): string | null {
const agentSuffix = `-${this.sanitizeCollectionNameSegment(this.agentId)}`;
if (!scopedName.endsWith(agentSuffix)) {
return null;
}
const legacyName = scopedName.slice(0, -agentSuffix.length).trim();
return legacyName || null;
}
private canMigrateLegacyCollection(
collection: ManagedCollection,
listedLegacy: ListedCollection,
): boolean {
if (listedLegacy.path && !this.pathsMatch(listedLegacy.path, collection.path)) {
return false;
}
if (typeof listedLegacy.pattern === "string" && listedLegacy.pattern !== collection.pattern) {
return false;
}
return true;
}
private async ensureCollectionPath(collection: {
path: string;
pattern: string;
kind: "memory" | "custom" | "sessions";
}): Promise<void> {
if (!this.isDirectoryGlobPattern(collection.pattern)) {
return;
}
await fs.mkdir(collection.path, { recursive: true });
}
private isDirectoryGlobPattern(pattern: string): boolean {
return pattern.includes("*") || pattern.includes("?") || pattern.includes("[");
}
private isCollectionAlreadyExistsError(message: string): boolean {
const lower = message.toLowerCase();
return lower.includes("already exists") || lower.includes("exists");
}
private isCollectionMissingError(message: string): boolean {
const lower = message.toLowerCase();
return (
lower.includes("not found") || lower.includes("does not exist") || lower.includes("missing")
);
}
private isMissingCollectionSearchError(err: unknown): boolean {
const message = err instanceof Error ? err.message : String(err);
return this.isCollectionMissingError(message) && message.toLowerCase().includes("collection");
}
private async tryRepairMissingCollectionSearch(err: unknown): Promise<boolean> {
if (!this.isMissingCollectionSearchError(err)) {
return false;
}
log.warn(
"qmd search failed because a managed collection is missing; repairing collections and retrying once",
);
await this.ensureCollections();
return true;
}
private async addCollection(pathArg: string, name: string, pattern: string): Promise<void> {
await this.runQmd(["collection", "add", pathArg, "--name", name, "--mask", pattern], {
timeoutMs: this.qmd.update.commandTimeoutMs,
});
}
private async removeCollection(name: string): Promise<void> {
await this.runQmd(["collection", "remove", name], {
timeoutMs: this.qmd.update.commandTimeoutMs,
});
}
private parseListedCollections(output: string): Map<string, ListedCollection> {
const listed = new Map<string, ListedCollection>();
const trimmed = output.trim();
if (!trimmed) {
return listed;
}
try {
const parsed = JSON.parse(trimmed) as unknown;
if (Array.isArray(parsed)) {
for (const entry of parsed) {
if (typeof entry === "string") {
listed.set(entry, {});
continue;
}
if (!entry || typeof entry !== "object") {
continue;
}
const name = (entry as { name?: unknown }).name;
if (typeof name !== "string") {
continue;
}
const listedPath = (entry as { path?: unknown }).path;
const listedPattern = (entry as { pattern?: unknown; mask?: unknown }).pattern;
const listedMask = (entry as { mask?: unknown }).mask;
listed.set(name, {
path: typeof listedPath === "string" ? listedPath : undefined,
pattern:
typeof listedPattern === "string"
? listedPattern
: typeof listedMask === "string"
? listedMask
: undefined,
});
}
return listed;
}
} catch {
// Some qmd builds ignore `--json` and still print table output.
}
let currentName: string | null = null;
for (const rawLine of output.split(/\r?\n/)) {
const line = rawLine.trimEnd();
if (!line.trim()) {
currentName = null;
continue;
}
const collectionLine = /^\s*([a-z0-9._-]+)\s+\(qmd:\/\/[^)]+\)\s*$/i.exec(line);
if (collectionLine) {
currentName = collectionLine[1];
if (!listed.has(currentName)) {
listed.set(currentName, {});
}
continue;
}
if (/^\s*collections\b/i.test(line)) {
continue;
}
const bareNameLine = /^\s*([a-z0-9._-]+)\s*$/i.exec(line);
if (bareNameLine && !line.includes(":")) {
currentName = bareNameLine[1];
if (!listed.has(currentName)) {
listed.set(currentName, {});
}
continue;
}
if (!currentName) {
continue;
}
const patternLine = /^\s*(?:pattern|mask)\s*:\s*(.+?)\s*$/i.exec(line);
if (patternLine) {
const existing = listed.get(currentName) ?? {};
existing.pattern = patternLine[1].trim();
listed.set(currentName, existing);
continue;
}
const pathLine = /^\s*path\s*:\s*(.+?)\s*$/i.exec(line);
if (pathLine) {
const existing = listed.get(currentName) ?? {};
existing.path = pathLine[1].trim();
listed.set(currentName, existing);
}
}
return listed;
}
private shouldRebindCollection(collection: ManagedCollection, listed: ListedCollection): boolean {
if (!listed.path) {
// Older qmd versions may only return names from `collection list --json`.
// Rebind managed collections so stale path bindings cannot survive upgrades.
return true;
}
if (!this.pathsMatch(listed.path, collection.path)) {
return true;
}
if (typeof listed.pattern === "string" && listed.pattern !== collection.pattern) {
return true;
}
return false;
}
private pathsMatch(left: string, right: string): boolean {
const normalize = (value: string): string => {
const resolved = path.isAbsolute(value)
? path.resolve(value)
: path.resolve(this.workspaceDir, value);
const normalized = path.normalize(resolved);
return process.platform === "win32" ? normalized.toLowerCase() : normalized;
};
return normalize(left) === normalize(right);
}
private shouldRepairNullByteCollectionError(err: unknown): boolean {
const message = err instanceof Error ? err.message : String(err);
const lower = message.toLowerCase();
return (
(lower.includes("enotdir") || lower.includes("not a directory")) &&
NUL_MARKER_RE.test(message)
);
}
private async tryRepairNullByteCollections(err: unknown, reason: string): Promise<boolean> {
if (this.attemptedNullByteCollectionRepair) {
return false;
}
if (!this.shouldRepairNullByteCollectionError(err)) {
return false;
}
this.attemptedNullByteCollectionRepair = true;
log.warn(
`qmd update failed with suspected null-byte collection metadata (${reason}); rebuilding managed collections and retrying once`,
);
for (const collection of this.qmd.collections) {
try {
await this.removeCollection(collection.name);
} catch (removeErr) {
const removeMessage = removeErr instanceof Error ? removeErr.message : String(removeErr);
if (!this.isCollectionMissingError(removeMessage)) {
log.warn(`qmd collection remove failed for ${collection.name}: ${removeMessage}`);
}
}
try {
await this.addCollection(collection.path, collection.name, collection.pattern);
} catch (addErr) {
const addMessage = addErr instanceof Error ? addErr.message : String(addErr);
if (!this.isCollectionAlreadyExistsError(addMessage)) {
log.warn(`qmd collection add failed for ${collection.name}: ${addMessage}`);
}
}
}
return true;
}
async search(
query: string,
opts?: { maxResults?: number; minScore?: number; sessionKey?: string },
): Promise<MemorySearchResult[]> {
if (!this.isScopeAllowed(opts?.sessionKey)) {
this.logScopeDenied(opts?.sessionKey);
return [];
}
const trimmed = query.trim();
if (!trimmed) {
return [];
}
await this.waitForPendingUpdateBeforeSearch();
const limit = Math.min(
this.qmd.limits.maxResults,
opts?.maxResults ?? this.qmd.limits.maxResults,
);
const collectionNames = this.listManagedCollectionNames();
if (collectionNames.length === 0) {
log.warn("qmd query skipped: no managed collections configured");
return [];
}
const qmdSearchCommand = this.qmd.searchMode;
const mcporterEnabled = this.qmd.mcporter.enabled;
const runSearchAttempt = async (
allowMissingCollectionRepair: boolean,
): Promise<QmdQueryResult[]> => {
try {
if (mcporterEnabled) {
const tool: "search" | "vector_search" | "deep_search" =
qmdSearchCommand === "search"
? "search"
: qmdSearchCommand === "vsearch"
? "vector_search"
: "deep_search";
const minScore = opts?.minScore ?? 0;
if (collectionNames.length > 1) {
return await this.runMcporterAcrossCollections({
tool,
query: trimmed,
limit,
minScore,
collectionNames,
});
}
return await this.runQmdSearchViaMcporter({
mcporter: this.qmd.mcporter,
tool,
query: trimmed,
limit,
minScore,
collection: collectionNames[0],
timeoutMs: this.qmd.limits.timeoutMs,
});
}
if (collectionNames.length > 1) {
return await this.runQueryAcrossCollections(
trimmed,
limit,
collectionNames,
qmdSearchCommand,
);
}
const args = this.buildSearchArgs(qmdSearchCommand, trimmed, limit);
args.push(...this.buildCollectionFilterArgs(collectionNames));
// Always scope to managed collections (default + custom). Even for `search`/`vsearch`,
// pass collection filters; if a given QMD build rejects these flags, we fall back to `query`.
const result = await this.runQmd(args, { timeoutMs: this.qmd.limits.timeoutMs });
return parseQmdQueryJson(result.stdout, result.stderr);
} catch (err) {
if (allowMissingCollectionRepair && this.isMissingCollectionSearchError(err)) {
throw err;
}
if (
!mcporterEnabled &&
qmdSearchCommand !== "query" &&
this.isUnsupportedQmdOptionError(err)
) {
log.warn(
`qmd ${qmdSearchCommand} does not support configured flags; retrying search with qmd query`,
);
try {
if (collectionNames.length > 1) {
return await this.runQueryAcrossCollections(trimmed, limit, collectionNames, "query");
}
const fallbackArgs = this.buildSearchArgs("query", trimmed, limit);
fallbackArgs.push(...this.buildCollectionFilterArgs(collectionNames));
const fallback = await this.runQmd(fallbackArgs, {
timeoutMs: this.qmd.limits.timeoutMs,
});
return parseQmdQueryJson(fallback.stdout, fallback.stderr);
} catch (fallbackErr) {
log.warn(`qmd query fallback failed: ${String(fallbackErr)}`);
throw fallbackErr instanceof Error ? fallbackErr : new Error(String(fallbackErr));
}
}
const label = mcporterEnabled ? "mcporter/qmd" : `qmd ${qmdSearchCommand}`;
log.warn(`${label} failed: ${String(err)}`);
throw err instanceof Error ? err : new Error(String(err));
}
};
let parsed: QmdQueryResult[];
try {
parsed = await runSearchAttempt(true);
} catch (err) {
if (!(await this.tryRepairMissingCollectionSearch(err))) {
throw err instanceof Error ? err : new Error(String(err));
}
parsed = await runSearchAttempt(false);
}
const results: MemorySearchResult[] = [];
for (const entry of parsed) {
const doc = await this.resolveDocLocation(entry.docid, {
preferredCollection: entry.collection,
preferredFile: entry.file,
});
if (!doc) {
continue;
}
const snippet = entry.snippet?.slice(0, this.qmd.limits.maxSnippetChars) ?? "";
const lines = this.extractSnippetLines(snippet);
const score = typeof entry.score === "number" ? entry.score : 0;
const minScore = opts?.minScore ?? 0;
if (score < minScore) {
continue;
}
results.push({
path: doc.rel,
startLine: lines.startLine,
endLine: lines.endLine,
score,
snippet,
source: doc.source,
});
}
return this.clampResultsByInjectedChars(this.diversifyResultsBySource(results, limit));
}
async sync(params?: {
reason?: string;
force?: boolean;
progress?: (update: MemorySyncProgressUpdate) => void;
}): Promise<void> {
if (params?.progress) {
params.progress({ completed: 0, total: 1, label: "Updating QMD index…" });
}
await this.runUpdate(params?.reason ?? "manual", params?.force);
if (params?.progress) {
params.progress({ completed: 1, total: 1, label: "QMD index updated" });
}
}
async readFile(params: {
relPath: string;
from?: number;
lines?: number;
}): Promise<{ text: string; path: string }> {
const relPath = params.relPath?.trim();
if (!relPath) {
throw new Error("path required");
}
const absPath = this.resolveReadPath(relPath);
if (!absPath.endsWith(".md")) {
throw new Error("path required");
}
const statResult = await statRegularFile(absPath);
if (statResult.missing) {
return { text: "", path: relPath };
}
if (params.from !== undefined || params.lines !== undefined) {
const partial = await this.readPartialText(absPath, params.from, params.lines);
if (partial.missing) {
return { text: "", path: relPath };
}
return { text: partial.text, path: relPath };
}
const full = await this.readFullText(absPath);
if (full.missing) {
return { text: "", path: relPath };
}
if (!params.from && !params.lines) {
return { text: full.text, path: relPath };
}
const lines = full.text.split("\n");
const start = Math.max(1, params.from ?? 1);
const count = Math.max(1, params.lines ?? lines.length);
const slice = lines.slice(start - 1, start - 1 + count);
return { text: slice.join("\n"), path: relPath };
}
status(): MemoryProviderStatus {
const counts = this.readCounts();
return {
backend: "qmd",
provider: "qmd",
model: "qmd",
requestedProvider: "qmd",
files: counts.totalDocuments,
chunks: counts.totalDocuments,
dirty: false,
workspaceDir: this.workspaceDir,
dbPath: this.indexPath,
sources: Array.from(this.sources),
sourceCounts: counts.sourceCounts,
vector: { enabled: true, available: true },
batch: {
enabled: false,
failures: 0,
limit: 0,
wait: false,
concurrency: 0,
pollIntervalMs: 0,
timeoutMs: 0,
},
custom: {
qmd: {
collections: this.qmd.collections.length,
lastUpdateAt: this.lastUpdateAt,
},
},
};
}
async probeEmbeddingAvailability(): Promise<MemoryEmbeddingProbeResult> {
return { ok: true };
}
async probeVectorAvailability(): Promise<boolean> {
return true;
}
async close(): Promise<void> {
if (this.closed) {
return;
}
this.closed = true;
if (this.updateTimer) {
clearInterval(this.updateTimer);
this.updateTimer = null;
}
this.queuedForcedRuns = 0;
await this.pendingUpdate?.catch(() => undefined);
await this.queuedForcedUpdate?.catch(() => undefined);
if (this.db) {
this.db.close();
this.db = null;
}
}
private async runUpdate(
reason: string,
force?: boolean,
opts?: { fromForcedQueue?: boolean },
): Promise<void> {
if (this.closed) {
return;
}
if (this.pendingUpdate) {
if (force) {
return this.enqueueForcedUpdate(reason);
}
return this.pendingUpdate;
}
if (this.queuedForcedUpdate && !opts?.fromForcedQueue) {
if (force) {
return this.enqueueForcedUpdate(reason);
}
return this.queuedForcedUpdate;
}
if (this.shouldSkipUpdate(force)) {
return;
}
const run = async () => {
if (this.sessionExporter) {
await this.exportSessions();
}
await this.runQmdUpdateWithRetry(reason);
if (this.shouldRunEmbed(force)) {
try {
await runWithQmdEmbedLock(async () => {
await this.runQmd(["embed"], { timeoutMs: this.qmd.update.embedTimeoutMs });
});
this.lastEmbedAt = Date.now();
this.embedBackoffUntil = null;
this.embedFailureCount = 0;
} catch (err) {
this.noteEmbedFailure(reason, err);
}
}
this.lastUpdateAt = Date.now();
this.docPathCache.clear();
};
this.pendingUpdate = run().finally(() => {
this.pendingUpdate = null;
});
await this.pendingUpdate;
}
private async runQmdUpdateWithRetry(reason: string): Promise<void> {
const isBootRun = reason === "boot" || reason.startsWith("boot:");
const maxAttempts = isBootRun ? 3 : 1;
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
try {
await this.runQmdUpdateOnce(reason);
return;
} catch (err) {
if (attempt >= maxAttempts || !this.isRetryableUpdateError(err)) {
throw err;
}
const delayMs = 500 * 2 ** (attempt - 1);
log.warn(
`qmd update retry ${attempt}/${maxAttempts - 1} after failure (${reason}): ${String(err)}`,
);
await new Promise<void>((resolve) => setTimeout(resolve, delayMs));
}
}
}
private async runQmdUpdateOnce(reason: string): Promise<void> {
try {
await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs });
} catch (err) {
if (!(await this.tryRepairNullByteCollections(err, reason))) {
throw err;
}
await this.runQmd(["update"], { timeoutMs: this.qmd.update.updateTimeoutMs });
}
}
private isRetryableUpdateError(err: unknown): boolean {
if (this.isSqliteBusyError(err)) {
return true;
}
const message = err instanceof Error ? err.message : String(err);
const normalized = message.toLowerCase();
return normalized.includes("timed out");
}
private shouldRunEmbed(force?: boolean): boolean {
if (this.qmd.searchMode === "search") {
return false;
}
const now = Date.now();
if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) {
return false;
}
const embedIntervalMs = this.qmd.update.embedIntervalMs;
return (
Boolean(force) ||
this.lastEmbedAt === null ||
(embedIntervalMs > 0 && now - this.lastEmbedAt > embedIntervalMs)
);
}
private noteEmbedFailure(reason: string, err: unknown): void {
this.embedFailureCount += 1;
const delayMs = Math.min(
QMD_EMBED_BACKOFF_MAX_MS,
QMD_EMBED_BACKOFF_BASE_MS * 2 ** Math.max(0, this.embedFailureCount - 1),
);
this.embedBackoffUntil = Date.now() + delayMs;
log.warn(
`qmd embed failed (${reason}): ${String(err)}; backing off for ${Math.ceil(delayMs / 1000)}s`,
);
}
private enqueueForcedUpdate(reason: string): Promise<void> {
this.queuedForcedRuns += 1;
if (!this.queuedForcedUpdate) {
this.queuedForcedUpdate = this.drainForcedUpdates(reason).finally(() => {
this.queuedForcedUpdate = null;
});
}
return this.queuedForcedUpdate;
}
private async drainForcedUpdates(reason: string): Promise<void> {
await this.pendingUpdate?.catch(() => undefined);
while (!this.closed && this.queuedForcedRuns > 0) {
this.queuedForcedRuns -= 1;
await this.runUpdate(`${reason}:queued`, true, { fromForcedQueue: true });
}
}
/**
* Symlink the default QMD models directory into our custom XDG_CACHE_HOME so
* that the pre-installed ML models (~/.cache/qmd/models/) are reused rather
* than re-downloaded for every agent. If the default models directory does
* not exist, or a models directory/symlink already exists in the target, this
* is a no-op.
*/
private async symlinkSharedModels(): Promise<void> {
// process.env is never modified — only this.env (passed to child_process
// spawn) overrides XDG_CACHE_HOME. So reading it here gives us the
// user's original value, which is where `qmd` downloaded its models.
//
// On Windows, well-behaved apps (including Rust `dirs` / Go os.UserCacheDir)
// store caches under %LOCALAPPDATA% rather than ~/.cache. Fall back to
// LOCALAPPDATA when XDG_CACHE_HOME is not set on Windows.
const defaultCacheHome =
process.env.XDG_CACHE_HOME ||
(process.platform === "win32" ? process.env.LOCALAPPDATA : undefined) ||
path.join(os.homedir(), ".cache");
const defaultModelsDir = path.join(defaultCacheHome, "qmd", "models");
const targetModelsDir = path.join(this.xdgCacheHome, "qmd", "models");
try {
// Check if the default models directory exists.
// Missing path is normal on first run and should be silent.
const stat = await fs.stat(defaultModelsDir).catch((err: unknown) => {
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
return null;
}
throw err;
});
if (!stat?.isDirectory()) {
return;
}
// Check if something already exists at the target path
try {
await fs.lstat(targetModelsDir);
// Already exists (directory, symlink, or file) leave it alone
return;
} catch {
// Does not exist proceed to create symlink
}
// On Windows, creating directory symlinks requires either Administrator
// privileges or Developer Mode. Fall back to a directory junction which
// works without elevated privileges (junctions are always absolute-path,
// which is fine here since both paths are already absolute).
try {
await fs.symlink(defaultModelsDir, targetModelsDir, "dir");
} catch (symlinkErr: unknown) {
const code = (symlinkErr as NodeJS.ErrnoException).code;
if (process.platform === "win32" && (code === "EPERM" || code === "ENOTSUP")) {
await fs.symlink(defaultModelsDir, targetModelsDir, "junction");
} else {
throw symlinkErr;
}
}
log.debug(`symlinked qmd models: ${defaultModelsDir}${targetModelsDir}`);
} catch (err) {
// Non-fatal: if we can't symlink, qmd will fall back to downloading
log.warn(`failed to symlink qmd models directory: ${String(err)}`);
}
}
private async runQmd(
args: string[],
opts?: { timeoutMs?: number },
): Promise<{ stdout: string; stderr: string }> {
return await new Promise((resolve, reject) => {
const child = spawn(resolveWindowsCommandShim(this.qmd.command), args, {
env: this.env,
cwd: this.workspaceDir,
});
let stdout = "";
let stderr = "";
let stdoutTruncated = false;
let stderrTruncated = false;
const timer = opts?.timeoutMs
? setTimeout(() => {
child.kill("SIGKILL");
reject(new Error(`qmd ${args.join(" ")} timed out after ${opts.timeoutMs}ms`));
}, opts.timeoutMs)
: null;
child.stdout.on("data", (data) => {
const next = appendOutputWithCap(stdout, data.toString("utf8"), this.maxQmdOutputChars);
stdout = next.text;
stdoutTruncated = stdoutTruncated || next.truncated;
});
child.stderr.on("data", (data) => {
const next = appendOutputWithCap(stderr, data.toString("utf8"), this.maxQmdOutputChars);
stderr = next.text;
stderrTruncated = stderrTruncated || next.truncated;
});
child.on("error", (err) => {
if (timer) {
clearTimeout(timer);
}
reject(err);
});
child.on("close", (code) => {
if (timer) {
clearTimeout(timer);
}
if (stdoutTruncated || stderrTruncated) {
reject(
new Error(
`qmd ${args.join(" ")} produced too much output (limit ${this.maxQmdOutputChars} chars)`,
),
);
return;
}
if (code === 0) {
resolve({ stdout, stderr });
} else {
reject(new Error(`qmd ${args.join(" ")} failed (code ${code}): ${stderr || stdout}`));
}
});
});
}
private async ensureMcporterDaemonStarted(mcporter: ResolvedQmdMcporterConfig): Promise<void> {
if (!mcporter.enabled) {
return;
}
if (!mcporter.startDaemon) {
type McporterWarnGlobal = typeof globalThis & {
__openclawMcporterColdStartWarned?: boolean;
};
const g: McporterWarnGlobal = globalThis;
if (!g.__openclawMcporterColdStartWarned) {
g.__openclawMcporterColdStartWarned = true;
log.warn(
"mcporter qmd bridge enabled but startDaemon=false; each query may cold-start QMD MCP. Consider setting memory.qmd.mcporter.startDaemon=true to keep it warm.",
);
}
return;
}
type McporterGlobal = typeof globalThis & {
__openclawMcporterDaemonStart?: Promise<void>;
};
const g: McporterGlobal = globalThis;
if (!g.__openclawMcporterDaemonStart) {
g.__openclawMcporterDaemonStart = (async () => {
try {
await this.runMcporter(["daemon", "start"], { timeoutMs: 10_000 });
} catch (err) {
log.warn(`mcporter daemon start failed: ${String(err)}`);
// Allow future searches to retry daemon start on transient failures.
delete g.__openclawMcporterDaemonStart;
}
})();
}
await g.__openclawMcporterDaemonStart;
}
private async runMcporter(
args: string[],
opts?: { timeoutMs?: number },
): Promise<{ stdout: string; stderr: string }> {
return await new Promise((resolve, reject) => {
const child = spawn(resolveWindowsCommandShim("mcporter"), args, {
// Keep mcporter and direct qmd commands on the same agent-scoped XDG state.
env: this.env,
cwd: this.workspaceDir,
});
let stdout = "";
let stderr = "";
let stdoutTruncated = false;
let stderrTruncated = false;
const timer = opts?.timeoutMs
? setTimeout(() => {
child.kill("SIGKILL");
reject(new Error(`mcporter ${args.join(" ")} timed out after ${opts.timeoutMs}ms`));
}, opts.timeoutMs)
: null;
child.stdout.on("data", (data) => {
const next = appendOutputWithCap(stdout, data.toString("utf8"), this.maxQmdOutputChars);
stdout = next.text;
stdoutTruncated = stdoutTruncated || next.truncated;
});
child.stderr.on("data", (data) => {
const next = appendOutputWithCap(stderr, data.toString("utf8"), this.maxQmdOutputChars);
stderr = next.text;
stderrTruncated = stderrTruncated || next.truncated;
});
child.on("error", (err) => {
if (timer) {
clearTimeout(timer);
}
reject(err);
});
child.on("close", (code) => {
if (timer) {
clearTimeout(timer);
}
if (stdoutTruncated || stderrTruncated) {
reject(
new Error(
`mcporter ${args.join(" ")} produced too much output (limit ${this.maxQmdOutputChars} chars)`,
),
);
return;
}
if (code === 0) {
resolve({ stdout, stderr });
} else {
reject(
new Error(`mcporter ${args.join(" ")} failed (code ${code}): ${stderr || stdout}`),
);
}
});
});
}
private async runQmdSearchViaMcporter(params: {
mcporter: ResolvedQmdMcporterConfig;
tool: "search" | "vector_search" | "deep_search";
query: string;
limit: number;
minScore: number;
collection?: string;
timeoutMs: number;
}): Promise<QmdQueryResult[]> {
await this.ensureMcporterDaemonStarted(params.mcporter);
const selector = `${params.mcporter.serverName}.${params.tool}`;
const callArgs: Record<string, unknown> = {
query: params.query,
limit: params.limit,
minScore: params.minScore,
};
if (params.collection) {
callArgs.collection = params.collection;
}
const result = await this.runMcporter(
[
"call",
selector,
"--args",
JSON.stringify(callArgs),
"--output",
"json",
"--timeout",
String(Math.max(0, params.timeoutMs)),
],
{ timeoutMs: Math.max(params.timeoutMs + 2_000, 5_000) },
);
const parsedUnknown: unknown = JSON.parse(result.stdout);
const isRecord = (value: unknown): value is Record<string, unknown> =>
typeof value === "object" && value !== null && !Array.isArray(value);
const structured =
isRecord(parsedUnknown) && isRecord(parsedUnknown.structuredContent)
? parsedUnknown.structuredContent
: parsedUnknown;
const results: unknown[] =
isRecord(structured) && Array.isArray(structured.results)
? (structured.results as unknown[])
: Array.isArray(structured)
? structured
: [];
const out: QmdQueryResult[] = [];
for (const item of results) {
if (!isRecord(item)) {
continue;
}
const docidRaw = item.docid;
const docid = typeof docidRaw === "string" ? docidRaw.replace(/^#/, "").trim() : "";
if (!docid) {
continue;
}
const scoreRaw = item.score;
const score = typeof scoreRaw === "number" ? scoreRaw : Number(scoreRaw);
const snippet = typeof item.snippet === "string" ? item.snippet : "";
out.push({ docid, score: Number.isFinite(score) ? score : 0, snippet });
}
return out;
}
private async readPartialText(
absPath: string,
from?: number,
lines?: number,
): Promise<{ missing: true } | { missing: false; text: string }> {
const start = Math.max(1, from ?? 1);
const count = Math.max(1, lines ?? Number.POSITIVE_INFINITY);
let handle;
try {
handle = await fs.open(absPath);
} catch (err) {
if (isFileMissingError(err)) {
return { missing: true };
}
throw err;
}
const stream = handle.createReadStream({ encoding: "utf-8" });
const rl = readline.createInterface({
input: stream,
crlfDelay: Infinity,
});
const selected: string[] = [];
let index = 0;
try {
for await (const line of rl) {
index += 1;
if (index < start) {
continue;
}
if (selected.length >= count) {
break;
}
selected.push(line);
}
} finally {
rl.close();
await handle.close();
}
return { missing: false, text: selected.slice(0, count).join("\n") };
}
private async readFullText(
absPath: string,
): Promise<{ missing: true } | { missing: false; text: string }> {
try {
const text = await fs.readFile(absPath, "utf-8");
return { missing: false, text };
} catch (err) {
if (isFileMissingError(err)) {
return { missing: true };
}
throw err;
}
}
private ensureDb(): SqliteDatabase {
if (this.db) {
return this.db;
}
const { DatabaseSync } = requireNodeSqlite();
this.db = new DatabaseSync(this.indexPath, { readOnly: true });
// Keep QMD recall responsive when the updater holds a write lock.
this.db.exec("PRAGMA busy_timeout = 1");
return this.db;
}
private async exportSessions(): Promise<void> {
if (!this.sessionExporter) {
return;
}
const exportDir = this.sessionExporter.dir;
await fs.mkdir(exportDir, { recursive: true });
const files = await listSessionFilesForAgent(this.agentId);
const keep = new Set<string>();
const tracked = new Set<string>();
const cutoff = this.sessionExporter.retentionMs
? Date.now() - this.sessionExporter.retentionMs
: null;
for (const sessionFile of files) {
const entry = await buildSessionEntry(sessionFile);
if (!entry) {
continue;
}
if (cutoff && entry.mtimeMs < cutoff) {
continue;
}
const target = path.join(exportDir, `${path.basename(sessionFile, ".jsonl")}.md`);
tracked.add(sessionFile);
const state = this.exportedSessionState.get(sessionFile);
if (!state || state.hash !== entry.hash || state.mtimeMs !== entry.mtimeMs) {
await fs.writeFile(target, this.renderSessionMarkdown(entry), "utf-8");
}
this.exportedSessionState.set(sessionFile, {
hash: entry.hash,
mtimeMs: entry.mtimeMs,
target,
});
keep.add(target);
}
const exported = await fs.readdir(exportDir).catch(() => []);
for (const name of exported) {
if (!name.endsWith(".md")) {
continue;
}
const full = path.join(exportDir, name);
if (!keep.has(full)) {
await fs.rm(full, { force: true });
}
}
for (const [sessionFile, state] of this.exportedSessionState) {
if (!tracked.has(sessionFile) || !state.target.startsWith(exportDir + path.sep)) {
this.exportedSessionState.delete(sessionFile);
}
}
}
private renderSessionMarkdown(entry: SessionFileEntry): string {
const header = `# Session ${path.basename(entry.absPath, path.extname(entry.absPath))}`;
const body = entry.content?.trim().length ? entry.content.trim() : "(empty)";
return `${header}\n\n${body}\n`;
}
private pickSessionCollectionName(): string {
const existing = new Set(this.qmd.collections.map((collection) => collection.name));
const base = `sessions-${this.sanitizeCollectionNameSegment(this.agentId)}`;
if (!existing.has(base)) {
return base;
}
let counter = 2;
let candidate = `${base}-${counter}`;
while (existing.has(candidate)) {
counter += 1;
candidate = `${base}-${counter}`;
}
return candidate;
}
private sanitizeCollectionNameSegment(input: string): string {
const lower = input.toLowerCase().replace(/[^a-z0-9-]+/g, "-");
const trimmed = lower.replace(/^-+|-+$/g, "");
return trimmed || "agent";
}
private async resolveDocLocation(
docid?: string,
hints?: { preferredCollection?: string; preferredFile?: string },
): Promise<{ rel: string; abs: string; source: MemorySource } | null> {
if (!docid) {
return null;
}
const normalized = docid.startsWith("#") ? docid.slice(1) : docid;
if (!normalized) {
return null;
}
const cacheKey = `${hints?.preferredCollection ?? "*"}:${normalized}`;
const cached = this.docPathCache.get(cacheKey);
if (cached) {
return cached;
}
const db = this.ensureDb();
let rows: Array<{ collection: string; path: string }> = [];
try {
rows = db
.prepare("SELECT collection, path FROM documents WHERE hash = ? AND active = 1")
.all(normalized) as Array<{ collection: string; path: string }>;
if (rows.length === 0) {
rows = db
.prepare("SELECT collection, path FROM documents WHERE hash LIKE ? AND active = 1")
.all(`${normalized}%`) as Array<{ collection: string; path: string }>;
}
} catch (err) {
if (this.isSqliteBusyError(err)) {
log.debug(`qmd index is busy while resolving doc path: ${String(err)}`);
throw this.createQmdBusyError(err);
}
throw err;
}
if (rows.length === 0) {
return null;
}
const location = this.pickDocLocation(rows, hints);
if (!location) {
return null;
}
this.docPathCache.set(cacheKey, location);
return location;
}
private pickDocLocation(
rows: Array<{ collection: string; path: string }>,
hints?: { preferredCollection?: string; preferredFile?: string },
): { rel: string; abs: string; source: MemorySource } | null {
if (hints?.preferredCollection) {
for (const row of rows) {
if (row.collection !== hints.preferredCollection) {
continue;
}
const location = this.toDocLocation(row.collection, row.path);
if (location) {
return location;
}
}
}
if (hints?.preferredFile) {
const preferred = path.normalize(hints.preferredFile);
for (const row of rows) {
const rowPath = path.normalize(row.path);
if (rowPath !== preferred && !rowPath.endsWith(path.sep + preferred)) {
continue;
}
const location = this.toDocLocation(row.collection, row.path);
if (location) {
return location;
}
}
}
for (const row of rows) {
const location = this.toDocLocation(row.collection, row.path);
if (location) {
return location;
}
}
return null;
}
private extractSnippetLines(snippet: string): { startLine: number; endLine: number } {
const match = SNIPPET_HEADER_RE.exec(snippet);
if (match) {
const start = Number(match[1]);
const count = Number(match[2]);
if (Number.isFinite(start) && Number.isFinite(count)) {
return { startLine: start, endLine: start + count - 1 };
}
}
const lines = snippet.split("\n").length;
return { startLine: 1, endLine: lines };
}
private readCounts(): {
totalDocuments: number;
sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>;
} {
try {
const db = this.ensureDb();
const rows = db
.prepare(
"SELECT collection, COUNT(*) as c FROM documents WHERE active = 1 GROUP BY collection",
)
.all() as Array<{ collection: string; c: number }>;
const bySource = new Map<MemorySource, { files: number; chunks: number }>();
for (const source of this.sources) {
bySource.set(source, { files: 0, chunks: 0 });
}
let total = 0;
for (const row of rows) {
const root = this.collectionRoots.get(row.collection);
const source = root?.kind ?? "memory";
const entry = bySource.get(source) ?? { files: 0, chunks: 0 };
entry.files += row.c ?? 0;
entry.chunks += row.c ?? 0;
bySource.set(source, entry);
total += row.c ?? 0;
}
return {
totalDocuments: total,
sourceCounts: Array.from(bySource.entries()).map(([source, value]) => ({
source,
files: value.files,
chunks: value.chunks,
})),
};
} catch (err) {
log.warn(`failed to read qmd index stats: ${String(err)}`);
return {
totalDocuments: 0,
sourceCounts: Array.from(this.sources).map((source) => ({ source, files: 0, chunks: 0 })),
};
}
}
private logScopeDenied(sessionKey?: string): void {
const channel = deriveQmdScopeChannel(sessionKey) ?? "unknown";
const chatType = deriveQmdScopeChatType(sessionKey) ?? "unknown";
const key = sessionKey?.trim() || "<none>";
log.warn(
`qmd search denied by scope (channel=${channel}, chatType=${chatType}, session=${key})`,
);
}
private isScopeAllowed(sessionKey?: string): boolean {
return isQmdScopeAllowed(this.qmd.scope, sessionKey);
}
private toDocLocation(
collection: string,
collectionRelativePath: string,
): { rel: string; abs: string; source: MemorySource } | null {
const root = this.collectionRoots.get(collection);
if (!root) {
return null;
}
const normalizedRelative = collectionRelativePath.replace(/\\/g, "/");
const absPath = path.normalize(path.resolve(root.path, collectionRelativePath));
const relativeToWorkspace = path.relative(this.workspaceDir, absPath);
const relPath = this.buildSearchPath(
collection,
normalizedRelative,
relativeToWorkspace,
absPath,
);
return { rel: relPath, abs: absPath, source: root.kind };
}
private buildSearchPath(
collection: string,
collectionRelativePath: string,
relativeToWorkspace: string,
absPath: string,
): string {
const insideWorkspace = this.isInsideWorkspace(relativeToWorkspace);
if (insideWorkspace) {
const normalized = relativeToWorkspace.replace(/\\/g, "/");
if (!normalized) {
return path.basename(absPath);
}
return normalized;
}
const sanitized = collectionRelativePath.replace(/^\/+/, "");
return `qmd/${collection}/${sanitized}`;
}
private isInsideWorkspace(relativePath: string): boolean {
if (!relativePath) {
return true;
}
if (relativePath.startsWith("..")) {
return false;
}
if (relativePath.startsWith(`..${path.sep}`)) {
return false;
}
return !path.isAbsolute(relativePath);
}
private resolveReadPath(relPath: string): string {
if (relPath.startsWith("qmd/")) {
const [, collection, ...rest] = relPath.split("/");
if (!collection || rest.length === 0) {
throw new Error("invalid qmd path");
}
const root = this.collectionRoots.get(collection);
if (!root) {
throw new Error(`unknown qmd collection: ${collection}`);
}
const joined = rest.join("/");
const resolved = path.resolve(root.path, joined);
if (!this.isWithinRoot(root.path, resolved)) {
throw new Error("qmd path escapes collection");
}
return resolved;
}
const absPath = path.resolve(this.workspaceDir, relPath);
if (!this.isWithinWorkspace(absPath)) {
throw new Error("path escapes workspace");
}
return absPath;
}
private isWithinWorkspace(absPath: string): boolean {
const normalizedWorkspace = this.workspaceDir.endsWith(path.sep)
? this.workspaceDir
: `${this.workspaceDir}${path.sep}`;
if (absPath === this.workspaceDir) {
return true;
}
const candidate = absPath.endsWith(path.sep) ? absPath : `${absPath}${path.sep}`;
return candidate.startsWith(normalizedWorkspace);
}
private isWithinRoot(root: string, candidate: string): boolean {
const normalizedRoot = root.endsWith(path.sep) ? root : `${root}${path.sep}`;
if (candidate === root) {
return true;
}
const next = candidate.endsWith(path.sep) ? candidate : `${candidate}${path.sep}`;
return next.startsWith(normalizedRoot);
}
private clampResultsByInjectedChars(results: MemorySearchResult[]): MemorySearchResult[] {
const budget = this.qmd.limits.maxInjectedChars;
if (!budget || budget <= 0) {
return results;
}
let remaining = budget;
const clamped: MemorySearchResult[] = [];
for (const entry of results) {
if (remaining <= 0) {
break;
}
const snippet = entry.snippet ?? "";
if (snippet.length <= remaining) {
clamped.push(entry);
remaining -= snippet.length;
} else {
const trimmed = snippet.slice(0, Math.max(0, remaining));
clamped.push({ ...entry, snippet: trimmed });
break;
}
}
return clamped;
}
private diversifyResultsBySource(
results: MemorySearchResult[],
limit: number,
): MemorySearchResult[] {
const target = Math.max(0, limit);
if (target <= 0) {
return [];
}
if (results.length <= 1) {
return results.slice(0, target);
}
const bySource = new Map<MemorySource, MemorySearchResult[]>();
for (const entry of results) {
const list = bySource.get(entry.source) ?? [];
list.push(entry);
bySource.set(entry.source, list);
}
const hasSessions = bySource.has("sessions");
const hasMemory = bySource.has("memory");
if (!hasSessions || !hasMemory) {
return results.slice(0, target);
}
const sourceOrder = Array.from(bySource.entries())
.toSorted((a, b) => (b[1][0]?.score ?? 0) - (a[1][0]?.score ?? 0))
.map(([source]) => source);
const diversified: MemorySearchResult[] = [];
while (diversified.length < target) {
let emitted = false;
for (const source of sourceOrder) {
const next = bySource.get(source)?.shift();
if (!next) {
continue;
}
diversified.push(next);
emitted = true;
if (diversified.length >= target) {
break;
}
}
if (!emitted) {
break;
}
}
return diversified;
}
private shouldSkipUpdate(force?: boolean): boolean {
if (force) {
return false;
}
const debounceMs = this.qmd.update.debounceMs;
if (debounceMs <= 0) {
return false;
}
if (!this.lastUpdateAt) {
return false;
}
return Date.now() - this.lastUpdateAt < debounceMs;
}
private isSqliteBusyError(err: unknown): boolean {
const message = err instanceof Error ? err.message : String(err);
const normalized = message.toLowerCase();
return normalized.includes("sqlite_busy") || normalized.includes("database is locked");
}
private isUnsupportedQmdOptionError(err: unknown): boolean {
const message = err instanceof Error ? err.message : String(err);
const normalized = message.toLowerCase();
return (
normalized.includes("unknown flag") ||
normalized.includes("unknown option") ||
normalized.includes("unrecognized option") ||
normalized.includes("flag provided but not defined") ||
normalized.includes("unexpected argument")
);
}
private createQmdBusyError(err: unknown): Error {
const message = err instanceof Error ? err.message : String(err);
return new Error(`qmd index busy while reading results: ${message}`);
}
private async waitForPendingUpdateBeforeSearch(): Promise<void> {
const pending = this.pendingUpdate;
if (!pending) {
return;
}
await Promise.race([
pending.catch(() => undefined),
new Promise<void>((resolve) => setTimeout(resolve, SEARCH_PENDING_UPDATE_WAIT_MS)),
]);
}
private async runQueryAcrossCollections(
query: string,
limit: number,
collectionNames: string[],
command: "query" | "search" | "vsearch",
): Promise<QmdQueryResult[]> {
log.debug(
`qmd ${command} multi-collection workaround active (${collectionNames.length} collections)`,
);
const bestByDocId = new Map<string, QmdQueryResult>();
for (const collectionName of collectionNames) {
const args = this.buildSearchArgs(command, query, limit);
args.push("-c", collectionName);
const result = await this.runQmd(args, { timeoutMs: this.qmd.limits.timeoutMs });
const parsed = parseQmdQueryJson(result.stdout, result.stderr);
for (const entry of parsed) {
const normalizedDocId =
typeof entry.docid === "string" && entry.docid.trim().length > 0
? entry.docid
: undefined;
if (!normalizedDocId) {
continue;
}
const withCollection = {
...entry,
docid: normalizedDocId,
collection: entry.collection ?? collectionName,
} satisfies QmdQueryResult;
const prev = bestByDocId.get(normalizedDocId);
const prevScore = typeof prev?.score === "number" ? prev.score : Number.NEGATIVE_INFINITY;
const nextScore =
typeof withCollection.score === "number"
? withCollection.score
: Number.NEGATIVE_INFINITY;
if (!prev || nextScore > prevScore) {
bestByDocId.set(normalizedDocId, withCollection);
}
}
}
return [...bestByDocId.values()].toSorted((a, b) => (b.score ?? 0) - (a.score ?? 0));
}
private async runMcporterAcrossCollections(params: {
tool: "search" | "vector_search" | "deep_search";
query: string;
limit: number;
minScore: number;
collectionNames: string[];
}): Promise<QmdQueryResult[]> {
const bestByDocId = new Map<string, QmdQueryResult>();
for (const collectionName of params.collectionNames) {
const parsed = await this.runQmdSearchViaMcporter({
mcporter: this.qmd.mcporter,
tool: params.tool,
query: params.query,
limit: params.limit,
minScore: params.minScore,
collection: collectionName,
timeoutMs: this.qmd.limits.timeoutMs,
});
for (const entry of parsed) {
if (typeof entry.docid !== "string" || !entry.docid.trim()) {
continue;
}
const prev = bestByDocId.get(entry.docid);
const prevScore = typeof prev?.score === "number" ? prev.score : Number.NEGATIVE_INFINITY;
const nextScore = typeof entry.score === "number" ? entry.score : Number.NEGATIVE_INFINITY;
if (!prev || nextScore > prevScore) {
bestByDocId.set(entry.docid, entry);
}
}
}
return [...bestByDocId.values()].toSorted((a, b) => (b.score ?? 0) - (a.score ?? 0));
}
private listManagedCollectionNames(): string[] {
const seen = new Set<string>();
const names: string[] = [];
for (const collection of this.qmd.collections) {
const name = collection.name?.trim();
if (!name || seen.has(name)) {
continue;
}
seen.add(name);
names.push(name);
}
return names;
}
private buildCollectionFilterArgs(collectionNames: string[]): string[] {
if (collectionNames.length === 0) {
return [];
}
const names = collectionNames.filter(Boolean);
return names.flatMap((name) => ["-c", name]);
}
private buildSearchArgs(
command: "query" | "search" | "vsearch",
query: string,
limit: number,
): string[] {
const normalizedQuery = command === "search" ? normalizeHanBm25Query(query) : query;
if (command === "query") {
return ["query", normalizedQuery, "--json", "-n", String(limit)];
}
return [command, normalizedQuery, "--json", "-n", String(limit)];
}
}
function appendOutputWithCap(
current: string,
chunk: string,
maxChars: number,
): { text: string; truncated: boolean } {
const appended = current + chunk;
if (appended.length <= maxChars) {
return { text: appended, truncated: false };
}
return { text: appended.slice(-maxChars), truncated: true };
}