refactor(web-fetch): dedupe firecrawl payload builder
This commit is contained in:
@@ -286,6 +286,43 @@ function wrapWebFetchField(value: string | undefined): string | undefined {
|
||||
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
|
||||
}
|
||||
|
||||
function buildFirecrawlWebFetchPayload(params: {
|
||||
firecrawl: Awaited<ReturnType<typeof fetchFirecrawlContent>>;
|
||||
rawUrl: string;
|
||||
finalUrlFallback: string;
|
||||
statusFallback: number;
|
||||
extractMode: ExtractMode;
|
||||
maxChars: number;
|
||||
tookMs: number;
|
||||
}): Record<string, unknown> {
|
||||
const wrapped = wrapWebFetchContent(params.firecrawl.text, params.maxChars);
|
||||
const wrappedTitle = params.firecrawl.title
|
||||
? wrapWebFetchField(params.firecrawl.title)
|
||||
: undefined;
|
||||
return {
|
||||
url: params.rawUrl, // Keep raw for tool chaining
|
||||
finalUrl: params.firecrawl.finalUrl || params.finalUrlFallback, // Keep raw
|
||||
status: params.firecrawl.status ?? params.statusFallback,
|
||||
contentType: "text/markdown", // Protocol metadata, don't wrap
|
||||
title: wrappedTitle,
|
||||
extractMode: params.extractMode,
|
||||
extractor: "firecrawl",
|
||||
externalContent: {
|
||||
untrusted: true,
|
||||
source: "web_fetch",
|
||||
wrapped: true,
|
||||
},
|
||||
truncated: wrapped.truncated,
|
||||
length: wrapped.wrappedLength,
|
||||
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
||||
wrappedLength: wrapped.wrappedLength,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
tookMs: params.tookMs,
|
||||
text: wrapped.text,
|
||||
warning: wrapWebFetchField(params.firecrawl.warning),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeContentType(value: string | null | undefined): string | undefined {
|
||||
if (!value) {
|
||||
return undefined;
|
||||
@@ -452,30 +489,15 @@ async function runWebFetch(params: {
|
||||
storeInCache: params.firecrawlStoreInCache,
|
||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
||||
});
|
||||
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars);
|
||||
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined;
|
||||
const payload = {
|
||||
url: params.url, // Keep raw for tool chaining
|
||||
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw
|
||||
status: firecrawl.status ?? 200,
|
||||
contentType: "text/markdown", // Protocol metadata, don't wrap
|
||||
title: wrappedTitle,
|
||||
const payload = buildFirecrawlWebFetchPayload({
|
||||
firecrawl,
|
||||
rawUrl: params.url,
|
||||
finalUrlFallback: finalUrl,
|
||||
statusFallback: 200,
|
||||
extractMode: params.extractMode,
|
||||
extractor: "firecrawl",
|
||||
externalContent: {
|
||||
untrusted: true,
|
||||
source: "web_fetch",
|
||||
wrapped: true,
|
||||
},
|
||||
truncated: wrapped.truncated,
|
||||
length: wrapped.wrappedLength,
|
||||
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
||||
wrappedLength: wrapped.wrappedLength,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
maxChars: params.maxChars,
|
||||
tookMs: Date.now() - start,
|
||||
text: wrapped.text,
|
||||
warning: wrapWebFetchField(firecrawl.warning),
|
||||
};
|
||||
});
|
||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||
return payload;
|
||||
}
|
||||
@@ -496,30 +518,15 @@ async function runWebFetch(params: {
|
||||
storeInCache: params.firecrawlStoreInCache,
|
||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
||||
});
|
||||
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars);
|
||||
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined;
|
||||
const payload = {
|
||||
url: params.url, // Keep raw for tool chaining
|
||||
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw
|
||||
status: firecrawl.status ?? res.status,
|
||||
contentType: "text/markdown", // Protocol metadata, don't wrap
|
||||
title: wrappedTitle,
|
||||
const payload = buildFirecrawlWebFetchPayload({
|
||||
firecrawl,
|
||||
rawUrl: params.url,
|
||||
finalUrlFallback: finalUrl,
|
||||
statusFallback: res.status,
|
||||
extractMode: params.extractMode,
|
||||
extractor: "firecrawl",
|
||||
externalContent: {
|
||||
untrusted: true,
|
||||
source: "web_fetch",
|
||||
wrapped: true,
|
||||
},
|
||||
truncated: wrapped.truncated,
|
||||
length: wrapped.wrappedLength,
|
||||
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
||||
wrappedLength: wrapped.wrappedLength,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
maxChars: params.maxChars,
|
||||
tookMs: Date.now() - start,
|
||||
text: wrapped.text,
|
||||
warning: wrapWebFetchField(firecrawl.warning),
|
||||
};
|
||||
});
|
||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||
return payload;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user