fix: harden provider error handling

This commit is contained in:
pi
2026-04-12 03:36:32 +01:00
parent 3eb2ef166c
commit 82b0d33ebd
7 changed files with 148 additions and 44 deletions

View File

@@ -168,3 +168,46 @@ test("createFirecrawlProvider fetches each URL via /scrape and preserves per-url
},
]);
});
test("createFirecrawlProvider limits concurrent scrape requests", async () => {
let active = 0;
let maxActive = 0;
const provider = createFirecrawlProvider(cloudConfig, async (_url, init) => {
active += 1;
maxActive = Math.max(maxActive, active);
const body = JSON.parse(String(init?.body));
await new Promise((resolve) => setTimeout(resolve, 10));
active -= 1;
return new Response(
JSON.stringify({
success: true,
data: {
metadata: {
title: body.url,
sourceURL: body.url,
},
markdown: `Fetched ${body.url}`,
},
}),
{ status: 200 },
);
});
const urls = [
"https://a.example",
"https://b.example",
"https://c.example",
"https://d.example",
"https://e.example",
"https://f.example",
"https://g.example",
];
const result = await provider.fetch({ urls });
assert.equal(result.results.length, urls.length);
assert.ok(maxActive <= 4, `expected max concurrency <= 4, got ${maxActive}`);
});

View File

@@ -9,6 +9,7 @@ import type {
} from "./types.ts";
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev/v2";
const DEFAULT_FIRECRAWL_FETCH_CONCURRENCY = 4;
type FirecrawlSearchPayload = {
id?: string;
@@ -124,6 +125,26 @@ function resolveFetchFormats(request: NormalizedFetchRequest) {
return uniqueFormats(formats.length > 0 ? formats : ["markdown"]);
}
async function mapWithConcurrency<TItem, TResult>(
items: TItem[],
concurrency: number,
iteratee: (item: TItem) => Promise<TResult>,
): Promise<TResult[]> {
const results = new Array<TResult>(items.length);
let nextIndex = 0;
const workers = Array.from({ length: Math.max(1, Math.min(concurrency, items.length)) }, async () => {
while (nextIndex < items.length) {
const currentIndex = nextIndex;
nextIndex += 1;
results[currentIndex] = await iteratee(items[currentIndex]!);
}
});
await Promise.all(workers);
return results;
}
function pickRequestId(payload: { id?: string; request_id?: string }) {
return typeof payload.id === "string"
? payload.id
@@ -180,37 +201,35 @@ export function createFirecrawlProvider(
validateFirecrawlFetchRequest(config.name, request);
const formats = resolveFetchFormats(request);
const results = await Promise.all(
request.urls.map(async (url) => {
try {
const payload = await postJson<FirecrawlScrapePayload>({
providerName: config.name,
baseUrl,
path: "/scrape",
apiKey: config.apiKey,
fetchImpl,
body: {
url,
formats,
},
});
return {
url: payload.data?.metadata?.sourceURL ?? url,
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
};
} catch (error) {
return {
const results = await mapWithConcurrency(request.urls, DEFAULT_FIRECRAWL_FETCH_CONCURRENCY, async (url) => {
try {
const payload = await postJson<FirecrawlScrapePayload>({
providerName: config.name,
baseUrl,
path: "/scrape",
apiKey: config.apiKey,
fetchImpl,
body: {
url,
title: null,
error: (error as Error).message,
};
}
}),
);
formats,
},
});
return {
url: payload.data?.metadata?.sourceURL ?? url,
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
};
} catch (error) {
return {
url,
title: null,
error: error instanceof Error ? error.message : String(error),
};
}
});
return {
providerName: config.name,

View File

@@ -0,0 +1,24 @@
import test from "node:test";
import assert from "node:assert/strict";
import { postJson } from "./http.ts";
test("postJson surfaces invalid JSON responses with HTTP context", async () => {
await assert.rejects(
() =>
postJson({
providerName: "firecrawl-main",
baseUrl: "https://api.firecrawl.dev/v2",
path: "/search",
body: { query: "pi docs" },
fetchImpl: async () =>
new Response("<html>not json</html>", {
status: 200,
statusText: "OK",
headers: {
"content-type": "text/html",
},
}),
}),
/Provider "firecrawl-main" HTTP 200 OK: invalid JSON response: <html>not json<\/html>/,
);
});

View File

@@ -15,11 +15,13 @@ export function joinApiUrl(baseUrl: string, path: string) {
return `${normalizedBaseUrl}${normalizedPath}`;
}
function formatHttpErrorMessage(providerName: string, response: Response, text: string) {
return `Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`;
}
export async function readHttpError(providerName: string, response: Response): Promise<never> {
const text = await response.text();
throw new Error(
`Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`,
);
throw new Error(formatHttpErrorMessage(providerName, response, text));
}
export async function postJson<T>({
@@ -44,9 +46,14 @@ export async function postJson<T>({
body: JSON.stringify(body),
});
const text = await response.text();
if (!response.ok) {
await readHttpError(providerName, response);
throw new Error(formatHttpErrorMessage(providerName, response, text));
}
return (await response.json()) as T;
try {
return JSON.parse(text) as T;
} catch {
throw new Error(formatHttpErrorMessage(providerName, response, `invalid JSON response: ${text}`));
}
}

View File

@@ -107,8 +107,16 @@ export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "t
}
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
const descriptor = getProviderDescriptor(providerConfig);
return descriptor.createProvider(providerConfig as never);
switch (providerConfig.type) {
case "exa":
return providerDescriptors.exa.createProvider(providerConfig);
case "tavily":
return providerDescriptors.tavily.createProvider(providerConfig);
case "firecrawl":
return providerDescriptors.firecrawl.createProvider(providerConfig);
default:
throw new Error(`Unknown provider type: ${(providerConfig as { type: string }).type}`);
}
}
export function validateSearchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedSearchRequest) {

View File

@@ -431,7 +431,7 @@ test("search records provider factory failures and follows fallbacks", async ()
}),
createProvider(providerConfig) {
if (providerConfig.name === "firecrawl-main") {
throw new Error("factory boom:firecrawl-main");
throw "factory boom:firecrawl-main";
}
return createProvider(providerConfig.name, providerConfig.type, {

View File

@@ -58,11 +58,14 @@ function buildExecutionMeta(
};
}
function describeError(error: unknown) {
return error instanceof Error ? error.message : String(error);
}
function attachAttempts(error: unknown, attempts: ProviderExecutionAttempt[]) {
if (error instanceof Error) {
(error as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
}
return error;
const normalizedError = error instanceof Error ? error : new Error(String(error));
(normalizedError as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
return normalizedError;
}
export function createWebSearchRuntime(
@@ -125,7 +128,7 @@ export function createWebSearchRuntime(
attempts.push({
providerName,
status: "failed",
reason: (error as Error).message,
reason: describeError(error),
});
lastError = error;
@@ -155,7 +158,7 @@ export function createWebSearchRuntime(
attempts.push({
providerName,
status: "failed",
reason: (error as Error).message,
reason: describeError(error),
});
lastError = error;