refactor(web-fetch): dedupe firecrawl payload builder

This commit is contained in:
Peter Steinberger
2026-02-15 05:53:55 +00:00
parent 2f4b91d738
commit 485b78bb94

View File

@@ -286,6 +286,43 @@ function wrapWebFetchField(value: string | undefined): string | undefined {
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
}
function buildFirecrawlWebFetchPayload(params: {
firecrawl: Awaited<ReturnType<typeof fetchFirecrawlContent>>;
rawUrl: string;
finalUrlFallback: string;
statusFallback: number;
extractMode: ExtractMode;
maxChars: number;
tookMs: number;
}): Record<string, unknown> {
const wrapped = wrapWebFetchContent(params.firecrawl.text, params.maxChars);
const wrappedTitle = params.firecrawl.title
? wrapWebFetchField(params.firecrawl.title)
: undefined;
return {
url: params.rawUrl, // Keep raw for tool chaining
finalUrl: params.firecrawl.finalUrl || params.finalUrlFallback, // Keep raw
status: params.firecrawl.status ?? params.statusFallback,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
extractMode: params.extractMode,
extractor: "firecrawl",
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
tookMs: params.tookMs,
text: wrapped.text,
warning: wrapWebFetchField(params.firecrawl.warning),
};
}
function normalizeContentType(value: string | null | undefined): string | undefined {
if (!value) {
return undefined;
@@ -452,30 +489,15 @@ async function runWebFetch(params: {
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
});
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars);
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined;
const payload = {
url: params.url, // Keep raw for tool chaining
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw
status: firecrawl.status ?? 200,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
const payload = buildFirecrawlWebFetchPayload({
firecrawl,
rawUrl: params.url,
finalUrlFallback: finalUrl,
statusFallback: 200,
extractMode: params.extractMode,
extractor: "firecrawl",
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
maxChars: params.maxChars,
tookMs: Date.now() - start,
text: wrapped.text,
warning: wrapWebFetchField(firecrawl.warning),
};
});
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}
@@ -496,30 +518,15 @@ async function runWebFetch(params: {
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
});
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars);
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined;
const payload = {
url: params.url, // Keep raw for tool chaining
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw
status: firecrawl.status ?? res.status,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
const payload = buildFirecrawlWebFetchPayload({
firecrawl,
rawUrl: params.url,
finalUrlFallback: finalUrl,
statusFallback: res.status,
extractMode: params.extractMode,
extractor: "firecrawl",
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
maxChars: params.maxChars,
tookMs: Date.now() - start,
text: wrapped.text,
warning: wrapWebFetchField(firecrawl.warning),
};
});
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}