* fix: prune stale session entries, cap entry count, and rotate sessions.json
The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m)
triggers multiple full rewrites, and session keys from groups, threads, and DMs
accumulate indefinitely with large embedded objects (skillsSnapshot,
systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop,
causing Telegram webhook timeouts and effectively taking the bot down.
Three mitigations, all running inside saveSessionStoreUnlocked() on every write:
1. Prune stale entries: remove entries with updatedAt older than 30 days
(configurable via session.maintenance.pruneDays in openclaw.json)
2. Cap entry count: keep only the 500 most recently updated entries
(configurable via session.maintenance.maxEntries). Entries without updatedAt
are evicted first.
3. File rotation: if the existing sessions.json exceeds 10MB before a write,
rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent
backups (configurable via session.maintenance.rotateBytes).
All three thresholds are configurable under session.maintenance in openclaw.json
with Zod validation. No env vars.
Existing tests updated to use Date.now() instead of epoch-relative timestamps
(1, 2, 3) that would be incorrectly pruned as stale.
27 new tests covering pruning, capping, rotation, and integration scenarios.
* feat: auto-prune expired cron run sessions (#12289)
Add TTL-based reaper for isolated cron run sessions that accumulate
indefinitely in sessions.json.
New config option:
cron.sessionRetention: string | false (default: '24h')
The reaper runs piggy-backed on the cron timer tick, self-throttled
to sweep at most every 5 minutes. It removes session entries matching
the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now.
Design follows the Kubernetes ttlSecondsAfterFinished pattern:
- Sessions are persisted normally (observability/debugging)
- A periodic reaper prunes expired entries
- Configurable retention with sensible default
- Set to false to disable pruning entirely
Files changed:
- src/config/types.cron.ts: Add sessionRetention to CronConfig
- src/config/zod-schema.ts: Add Zod validation for sessionRetention
- src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions)
- src/cron/session-reaper.test.ts: 12 tests covering all paths
- src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps
- src/cron/service/timer.ts: Wire reaper into onTimer tick
- src/gateway/server-cron.ts: Pass config and session store path to deps
Closes #12289
* fix: sweep cron session stores per agent
* docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg)
* fix: add warn-only session maintenance mode
* fix: warn-only maintenance defaults to active session
* fix: deliver maintenance warnings to active session
* docs: add session maintenance examples
* fix: accept duration and size maintenance thresholds
* refactor: share cron run session key check
* fix: format issues and replace defaultRuntime.warn with console.warn
---------
Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com>
Co-authored-by: Glucksberg <markuscontasul@gmail.com>
Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com>
Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
188 lines
5.9 KiB
TypeScript
188 lines
5.9 KiB
TypeScript
import type { ChatType } from "../channels/chat-type.js";
|
||
|
||
export type ReplyMode = "text" | "command";
|
||
export type TypingMode = "never" | "instant" | "thinking" | "message";
|
||
export type SessionScope = "per-sender" | "global";
|
||
export type DmScope = "main" | "per-peer" | "per-channel-peer" | "per-account-channel-peer";
|
||
export type ReplyToMode = "off" | "first" | "all";
|
||
export type GroupPolicy = "open" | "disabled" | "allowlist";
|
||
export type DmPolicy = "pairing" | "allowlist" | "open" | "disabled";
|
||
|
||
export type OutboundRetryConfig = {
|
||
/** Max retry attempts for outbound requests (default: 3). */
|
||
attempts?: number;
|
||
/** Minimum retry delay in ms (default: 300-500ms depending on provider). */
|
||
minDelayMs?: number;
|
||
/** Maximum retry delay cap in ms (default: 30000). */
|
||
maxDelayMs?: number;
|
||
/** Jitter factor (0-1) applied to delays (default: 0.1). */
|
||
jitter?: number;
|
||
};
|
||
|
||
export type BlockStreamingCoalesceConfig = {
|
||
minChars?: number;
|
||
maxChars?: number;
|
||
idleMs?: number;
|
||
};
|
||
|
||
export type BlockStreamingChunkConfig = {
|
||
minChars?: number;
|
||
maxChars?: number;
|
||
breakPreference?: "paragraph" | "newline" | "sentence";
|
||
};
|
||
|
||
export type MarkdownTableMode = "off" | "bullets" | "code";
|
||
|
||
export type MarkdownConfig = {
|
||
/** Table rendering mode (off|bullets|code). */
|
||
tables?: MarkdownTableMode;
|
||
};
|
||
|
||
export type HumanDelayConfig = {
|
||
/** Delay style for block replies (off|natural|custom). */
|
||
mode?: "off" | "natural" | "custom";
|
||
/** Minimum delay in milliseconds (default: 800). */
|
||
minMs?: number;
|
||
/** Maximum delay in milliseconds (default: 2500). */
|
||
maxMs?: number;
|
||
};
|
||
|
||
export type SessionSendPolicyAction = "allow" | "deny";
|
||
export type SessionSendPolicyMatch = {
|
||
channel?: string;
|
||
chatType?: ChatType;
|
||
keyPrefix?: string;
|
||
};
|
||
export type SessionSendPolicyRule = {
|
||
action: SessionSendPolicyAction;
|
||
match?: SessionSendPolicyMatch;
|
||
};
|
||
export type SessionSendPolicyConfig = {
|
||
default?: SessionSendPolicyAction;
|
||
rules?: SessionSendPolicyRule[];
|
||
};
|
||
|
||
export type SessionResetMode = "daily" | "idle";
|
||
export type SessionResetConfig = {
|
||
mode?: SessionResetMode;
|
||
/** Local hour (0-23) for the daily reset boundary. */
|
||
atHour?: number;
|
||
/** Sliding idle window (minutes). When set with daily mode, whichever expires first wins. */
|
||
idleMinutes?: number;
|
||
};
|
||
export type SessionResetByTypeConfig = {
|
||
direct?: SessionResetConfig;
|
||
/** @deprecated Use `direct` instead. Kept for backward compatibility. */
|
||
dm?: SessionResetConfig;
|
||
group?: SessionResetConfig;
|
||
thread?: SessionResetConfig;
|
||
};
|
||
|
||
export type SessionConfig = {
|
||
scope?: SessionScope;
|
||
/** DM session scoping (default: "main"). */
|
||
dmScope?: DmScope;
|
||
/** Map platform-prefixed identities (e.g. "telegram:123") to canonical DM peers. */
|
||
identityLinks?: Record<string, string[]>;
|
||
resetTriggers?: string[];
|
||
idleMinutes?: number;
|
||
reset?: SessionResetConfig;
|
||
resetByType?: SessionResetByTypeConfig;
|
||
/** Channel-specific reset overrides (e.g. { discord: { mode: "idle", idleMinutes: 10080 } }). */
|
||
resetByChannel?: Record<string, SessionResetConfig>;
|
||
store?: string;
|
||
typingIntervalSeconds?: number;
|
||
typingMode?: TypingMode;
|
||
mainKey?: string;
|
||
sendPolicy?: SessionSendPolicyConfig;
|
||
agentToAgent?: {
|
||
/** Max ping-pong turns between requester/target (0–5). Default: 5. */
|
||
maxPingPongTurns?: number;
|
||
};
|
||
/** Automatic session store maintenance (pruning, capping, file rotation). */
|
||
maintenance?: SessionMaintenanceConfig;
|
||
};
|
||
|
||
export type SessionMaintenanceMode = "enforce" | "warn";
|
||
|
||
export type SessionMaintenanceConfig = {
|
||
/** Whether to enforce maintenance or warn only. Default: "warn". */
|
||
mode?: SessionMaintenanceMode;
|
||
/** Remove session entries older than this duration (e.g. "30d", "12h"). Default: "30d". */
|
||
pruneAfter?: string | number;
|
||
/** Deprecated. Use pruneAfter instead. */
|
||
pruneDays?: number;
|
||
/** Maximum number of session entries to keep. Default: 500. */
|
||
maxEntries?: number;
|
||
/** Rotate sessions.json when it exceeds this size (e.g. "10mb"). Default: 10mb. */
|
||
rotateBytes?: number | string;
|
||
};
|
||
|
||
export type LoggingConfig = {
|
||
level?: "silent" | "fatal" | "error" | "warn" | "info" | "debug" | "trace";
|
||
file?: string;
|
||
consoleLevel?: "silent" | "fatal" | "error" | "warn" | "info" | "debug" | "trace";
|
||
consoleStyle?: "pretty" | "compact" | "json";
|
||
/** Redact sensitive tokens in tool summaries. Default: "tools". */
|
||
redactSensitive?: "off" | "tools";
|
||
/** Regex patterns used to redact sensitive tokens (defaults apply when unset). */
|
||
redactPatterns?: string[];
|
||
};
|
||
|
||
export type DiagnosticsOtelConfig = {
|
||
enabled?: boolean;
|
||
endpoint?: string;
|
||
protocol?: "http/protobuf" | "grpc";
|
||
headers?: Record<string, string>;
|
||
serviceName?: string;
|
||
traces?: boolean;
|
||
metrics?: boolean;
|
||
logs?: boolean;
|
||
/** Trace sample rate (0.0 - 1.0). */
|
||
sampleRate?: number;
|
||
/** Metric export interval (ms). */
|
||
flushIntervalMs?: number;
|
||
};
|
||
|
||
export type DiagnosticsCacheTraceConfig = {
|
||
enabled?: boolean;
|
||
filePath?: string;
|
||
includeMessages?: boolean;
|
||
includePrompt?: boolean;
|
||
includeSystem?: boolean;
|
||
};
|
||
|
||
export type DiagnosticsConfig = {
|
||
enabled?: boolean;
|
||
/** Optional ad-hoc diagnostics flags (e.g. "telegram.http"). */
|
||
flags?: string[];
|
||
otel?: DiagnosticsOtelConfig;
|
||
cacheTrace?: DiagnosticsCacheTraceConfig;
|
||
};
|
||
|
||
export type WebReconnectConfig = {
|
||
initialMs?: number;
|
||
maxMs?: number;
|
||
factor?: number;
|
||
jitter?: number;
|
||
maxAttempts?: number; // 0 = unlimited
|
||
};
|
||
|
||
export type WebConfig = {
|
||
/** If false, do not start the WhatsApp web provider. Default: true. */
|
||
enabled?: boolean;
|
||
heartbeatSeconds?: number;
|
||
reconnect?: WebReconnectConfig;
|
||
};
|
||
|
||
// Provider docking: allowlists keyed by provider id (and internal "webchat").
|
||
export type AgentElevatedAllowFromConfig = Partial<Record<string, Array<string | number>>>;
|
||
|
||
export type IdentityConfig = {
|
||
name?: string;
|
||
theme?: string;
|
||
emoji?: string;
|
||
/** Avatar image: workspace-relative path, http(s) URL, or data URI. */
|
||
avatar?: string;
|
||
};
|