fix: harden provider error handling
This commit is contained in:
@@ -168,3 +168,46 @@ test("createFirecrawlProvider fetches each URL via /scrape and preserves per-url
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test("createFirecrawlProvider limits concurrent scrape requests", async () => {
|
||||
let active = 0;
|
||||
let maxActive = 0;
|
||||
|
||||
const provider = createFirecrawlProvider(cloudConfig, async (_url, init) => {
|
||||
active += 1;
|
||||
maxActive = Math.max(maxActive, active);
|
||||
|
||||
const body = JSON.parse(String(init?.body));
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
|
||||
active -= 1;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
success: true,
|
||||
data: {
|
||||
metadata: {
|
||||
title: body.url,
|
||||
sourceURL: body.url,
|
||||
},
|
||||
markdown: `Fetched ${body.url}`,
|
||||
},
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
});
|
||||
|
||||
const urls = [
|
||||
"https://a.example",
|
||||
"https://b.example",
|
||||
"https://c.example",
|
||||
"https://d.example",
|
||||
"https://e.example",
|
||||
"https://f.example",
|
||||
"https://g.example",
|
||||
];
|
||||
|
||||
const result = await provider.fetch({ urls });
|
||||
|
||||
assert.equal(result.results.length, urls.length);
|
||||
assert.ok(maxActive <= 4, `expected max concurrency <= 4, got ${maxActive}`);
|
||||
});
|
||||
|
||||
@@ -9,6 +9,7 @@ import type {
|
||||
} from "./types.ts";
|
||||
|
||||
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev/v2";
|
||||
const DEFAULT_FIRECRAWL_FETCH_CONCURRENCY = 4;
|
||||
|
||||
type FirecrawlSearchPayload = {
|
||||
id?: string;
|
||||
@@ -124,6 +125,26 @@ function resolveFetchFormats(request: NormalizedFetchRequest) {
|
||||
return uniqueFormats(formats.length > 0 ? formats : ["markdown"]);
|
||||
}
|
||||
|
||||
async function mapWithConcurrency<TItem, TResult>(
|
||||
items: TItem[],
|
||||
concurrency: number,
|
||||
iteratee: (item: TItem) => Promise<TResult>,
|
||||
): Promise<TResult[]> {
|
||||
const results = new Array<TResult>(items.length);
|
||||
let nextIndex = 0;
|
||||
|
||||
const workers = Array.from({ length: Math.max(1, Math.min(concurrency, items.length)) }, async () => {
|
||||
while (nextIndex < items.length) {
|
||||
const currentIndex = nextIndex;
|
||||
nextIndex += 1;
|
||||
results[currentIndex] = await iteratee(items[currentIndex]!);
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all(workers);
|
||||
return results;
|
||||
}
|
||||
|
||||
function pickRequestId(payload: { id?: string; request_id?: string }) {
|
||||
return typeof payload.id === "string"
|
||||
? payload.id
|
||||
@@ -180,37 +201,35 @@ export function createFirecrawlProvider(
|
||||
validateFirecrawlFetchRequest(config.name, request);
|
||||
const formats = resolveFetchFormats(request);
|
||||
|
||||
const results = await Promise.all(
|
||||
request.urls.map(async (url) => {
|
||||
try {
|
||||
const payload = await postJson<FirecrawlScrapePayload>({
|
||||
providerName: config.name,
|
||||
baseUrl,
|
||||
path: "/scrape",
|
||||
apiKey: config.apiKey,
|
||||
fetchImpl,
|
||||
body: {
|
||||
url,
|
||||
formats,
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
url: payload.data?.metadata?.sourceURL ?? url,
|
||||
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
|
||||
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
|
||||
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
|
||||
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
const results = await mapWithConcurrency(request.urls, DEFAULT_FIRECRAWL_FETCH_CONCURRENCY, async (url) => {
|
||||
try {
|
||||
const payload = await postJson<FirecrawlScrapePayload>({
|
||||
providerName: config.name,
|
||||
baseUrl,
|
||||
path: "/scrape",
|
||||
apiKey: config.apiKey,
|
||||
fetchImpl,
|
||||
body: {
|
||||
url,
|
||||
title: null,
|
||||
error: (error as Error).message,
|
||||
};
|
||||
}
|
||||
}),
|
||||
);
|
||||
formats,
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
url: payload.data?.metadata?.sourceURL ?? url,
|
||||
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
|
||||
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
|
||||
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
|
||||
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
url,
|
||||
title: null,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
providerName: config.name,
|
||||
|
||||
24
src/providers/http.test.ts
Normal file
24
src/providers/http.test.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { postJson } from "./http.ts";
|
||||
|
||||
test("postJson surfaces invalid JSON responses with HTTP context", async () => {
|
||||
await assert.rejects(
|
||||
() =>
|
||||
postJson({
|
||||
providerName: "firecrawl-main",
|
||||
baseUrl: "https://api.firecrawl.dev/v2",
|
||||
path: "/search",
|
||||
body: { query: "pi docs" },
|
||||
fetchImpl: async () =>
|
||||
new Response("<html>not json</html>", {
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: {
|
||||
"content-type": "text/html",
|
||||
},
|
||||
}),
|
||||
}),
|
||||
/Provider "firecrawl-main" HTTP 200 OK: invalid JSON response: <html>not json<\/html>/,
|
||||
);
|
||||
});
|
||||
@@ -15,11 +15,13 @@ export function joinApiUrl(baseUrl: string, path: string) {
|
||||
return `${normalizedBaseUrl}${normalizedPath}`;
|
||||
}
|
||||
|
||||
function formatHttpErrorMessage(providerName: string, response: Response, text: string) {
|
||||
return `Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`;
|
||||
}
|
||||
|
||||
export async function readHttpError(providerName: string, response: Response): Promise<never> {
|
||||
const text = await response.text();
|
||||
throw new Error(
|
||||
`Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`,
|
||||
);
|
||||
throw new Error(formatHttpErrorMessage(providerName, response, text));
|
||||
}
|
||||
|
||||
export async function postJson<T>({
|
||||
@@ -44,9 +46,14 @@ export async function postJson<T>({
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
const text = await response.text();
|
||||
if (!response.ok) {
|
||||
await readHttpError(providerName, response);
|
||||
throw new Error(formatHttpErrorMessage(providerName, response, text));
|
||||
}
|
||||
|
||||
return (await response.json()) as T;
|
||||
try {
|
||||
return JSON.parse(text) as T;
|
||||
} catch {
|
||||
throw new Error(formatHttpErrorMessage(providerName, response, `invalid JSON response: ${text}`));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,8 +107,16 @@ export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "t
|
||||
}
|
||||
|
||||
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
|
||||
const descriptor = getProviderDescriptor(providerConfig);
|
||||
return descriptor.createProvider(providerConfig as never);
|
||||
switch (providerConfig.type) {
|
||||
case "exa":
|
||||
return providerDescriptors.exa.createProvider(providerConfig);
|
||||
case "tavily":
|
||||
return providerDescriptors.tavily.createProvider(providerConfig);
|
||||
case "firecrawl":
|
||||
return providerDescriptors.firecrawl.createProvider(providerConfig);
|
||||
default:
|
||||
throw new Error(`Unknown provider type: ${(providerConfig as { type: string }).type}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function validateSearchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedSearchRequest) {
|
||||
|
||||
@@ -431,7 +431,7 @@ test("search records provider factory failures and follows fallbacks", async ()
|
||||
}),
|
||||
createProvider(providerConfig) {
|
||||
if (providerConfig.name === "firecrawl-main") {
|
||||
throw new Error("factory boom:firecrawl-main");
|
||||
throw "factory boom:firecrawl-main";
|
||||
}
|
||||
|
||||
return createProvider(providerConfig.name, providerConfig.type, {
|
||||
|
||||
@@ -58,11 +58,14 @@ function buildExecutionMeta(
|
||||
};
|
||||
}
|
||||
|
||||
function describeError(error: unknown) {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function attachAttempts(error: unknown, attempts: ProviderExecutionAttempt[]) {
|
||||
if (error instanceof Error) {
|
||||
(error as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
|
||||
}
|
||||
return error;
|
||||
const normalizedError = error instanceof Error ? error : new Error(String(error));
|
||||
(normalizedError as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
|
||||
return normalizedError;
|
||||
}
|
||||
|
||||
export function createWebSearchRuntime(
|
||||
@@ -125,7 +128,7 @@ export function createWebSearchRuntime(
|
||||
attempts.push({
|
||||
providerName,
|
||||
status: "failed",
|
||||
reason: (error as Error).message,
|
||||
reason: describeError(error),
|
||||
});
|
||||
lastError = error;
|
||||
|
||||
@@ -155,7 +158,7 @@ export function createWebSearchRuntime(
|
||||
attempts.push({
|
||||
providerName,
|
||||
status: "failed",
|
||||
reason: (error as Error).message,
|
||||
reason: describeError(error),
|
||||
});
|
||||
lastError = error;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user