fix: harden provider error handling
This commit is contained in:
@@ -168,3 +168,46 @@ test("createFirecrawlProvider fetches each URL via /scrape and preserves per-url
|
|||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("createFirecrawlProvider limits concurrent scrape requests", async () => {
|
||||||
|
let active = 0;
|
||||||
|
let maxActive = 0;
|
||||||
|
|
||||||
|
const provider = createFirecrawlProvider(cloudConfig, async (_url, init) => {
|
||||||
|
active += 1;
|
||||||
|
maxActive = Math.max(maxActive, active);
|
||||||
|
|
||||||
|
const body = JSON.parse(String(init?.body));
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||||
|
|
||||||
|
active -= 1;
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
metadata: {
|
||||||
|
title: body.url,
|
||||||
|
sourceURL: body.url,
|
||||||
|
},
|
||||||
|
markdown: `Fetched ${body.url}`,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
{ status: 200 },
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
const urls = [
|
||||||
|
"https://a.example",
|
||||||
|
"https://b.example",
|
||||||
|
"https://c.example",
|
||||||
|
"https://d.example",
|
||||||
|
"https://e.example",
|
||||||
|
"https://f.example",
|
||||||
|
"https://g.example",
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = await provider.fetch({ urls });
|
||||||
|
|
||||||
|
assert.equal(result.results.length, urls.length);
|
||||||
|
assert.ok(maxActive <= 4, `expected max concurrency <= 4, got ${maxActive}`);
|
||||||
|
});
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import type {
|
|||||||
} from "./types.ts";
|
} from "./types.ts";
|
||||||
|
|
||||||
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev/v2";
|
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev/v2";
|
||||||
|
const DEFAULT_FIRECRAWL_FETCH_CONCURRENCY = 4;
|
||||||
|
|
||||||
type FirecrawlSearchPayload = {
|
type FirecrawlSearchPayload = {
|
||||||
id?: string;
|
id?: string;
|
||||||
@@ -124,6 +125,26 @@ function resolveFetchFormats(request: NormalizedFetchRequest) {
|
|||||||
return uniqueFormats(formats.length > 0 ? formats : ["markdown"]);
|
return uniqueFormats(formats.length > 0 ? formats : ["markdown"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function mapWithConcurrency<TItem, TResult>(
|
||||||
|
items: TItem[],
|
||||||
|
concurrency: number,
|
||||||
|
iteratee: (item: TItem) => Promise<TResult>,
|
||||||
|
): Promise<TResult[]> {
|
||||||
|
const results = new Array<TResult>(items.length);
|
||||||
|
let nextIndex = 0;
|
||||||
|
|
||||||
|
const workers = Array.from({ length: Math.max(1, Math.min(concurrency, items.length)) }, async () => {
|
||||||
|
while (nextIndex < items.length) {
|
||||||
|
const currentIndex = nextIndex;
|
||||||
|
nextIndex += 1;
|
||||||
|
results[currentIndex] = await iteratee(items[currentIndex]!);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.all(workers);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
function pickRequestId(payload: { id?: string; request_id?: string }) {
|
function pickRequestId(payload: { id?: string; request_id?: string }) {
|
||||||
return typeof payload.id === "string"
|
return typeof payload.id === "string"
|
||||||
? payload.id
|
? payload.id
|
||||||
@@ -180,37 +201,35 @@ export function createFirecrawlProvider(
|
|||||||
validateFirecrawlFetchRequest(config.name, request);
|
validateFirecrawlFetchRequest(config.name, request);
|
||||||
const formats = resolveFetchFormats(request);
|
const formats = resolveFetchFormats(request);
|
||||||
|
|
||||||
const results = await Promise.all(
|
const results = await mapWithConcurrency(request.urls, DEFAULT_FIRECRAWL_FETCH_CONCURRENCY, async (url) => {
|
||||||
request.urls.map(async (url) => {
|
try {
|
||||||
try {
|
const payload = await postJson<FirecrawlScrapePayload>({
|
||||||
const payload = await postJson<FirecrawlScrapePayload>({
|
providerName: config.name,
|
||||||
providerName: config.name,
|
baseUrl,
|
||||||
baseUrl,
|
path: "/scrape",
|
||||||
path: "/scrape",
|
apiKey: config.apiKey,
|
||||||
apiKey: config.apiKey,
|
fetchImpl,
|
||||||
fetchImpl,
|
body: {
|
||||||
body: {
|
|
||||||
url,
|
|
||||||
formats,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
url: payload.data?.metadata?.sourceURL ?? url,
|
|
||||||
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
|
|
||||||
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
|
|
||||||
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
|
|
||||||
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
return {
|
|
||||||
url,
|
url,
|
||||||
title: null,
|
formats,
|
||||||
error: (error as Error).message,
|
},
|
||||||
};
|
});
|
||||||
}
|
|
||||||
}),
|
return {
|
||||||
);
|
url: payload.data?.metadata?.sourceURL ?? url,
|
||||||
|
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
|
||||||
|
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
|
||||||
|
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
|
||||||
|
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
title: null,
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
providerName: config.name,
|
providerName: config.name,
|
||||||
|
|||||||
24
src/providers/http.test.ts
Normal file
24
src/providers/http.test.ts
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import test from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { postJson } from "./http.ts";
|
||||||
|
|
||||||
|
test("postJson surfaces invalid JSON responses with HTTP context", async () => {
|
||||||
|
await assert.rejects(
|
||||||
|
() =>
|
||||||
|
postJson({
|
||||||
|
providerName: "firecrawl-main",
|
||||||
|
baseUrl: "https://api.firecrawl.dev/v2",
|
||||||
|
path: "/search",
|
||||||
|
body: { query: "pi docs" },
|
||||||
|
fetchImpl: async () =>
|
||||||
|
new Response("<html>not json</html>", {
|
||||||
|
status: 200,
|
||||||
|
statusText: "OK",
|
||||||
|
headers: {
|
||||||
|
"content-type": "text/html",
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
/Provider "firecrawl-main" HTTP 200 OK: invalid JSON response: <html>not json<\/html>/,
|
||||||
|
);
|
||||||
|
});
|
||||||
@@ -15,11 +15,13 @@ export function joinApiUrl(baseUrl: string, path: string) {
|
|||||||
return `${normalizedBaseUrl}${normalizedPath}`;
|
return `${normalizedBaseUrl}${normalizedPath}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatHttpErrorMessage(providerName: string, response: Response, text: string) {
|
||||||
|
return `Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`;
|
||||||
|
}
|
||||||
|
|
||||||
export async function readHttpError(providerName: string, response: Response): Promise<never> {
|
export async function readHttpError(providerName: string, response: Response): Promise<never> {
|
||||||
const text = await response.text();
|
const text = await response.text();
|
||||||
throw new Error(
|
throw new Error(formatHttpErrorMessage(providerName, response, text));
|
||||||
`Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function postJson<T>({
|
export async function postJson<T>({
|
||||||
@@ -44,9 +46,14 @@ export async function postJson<T>({
|
|||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const text = await response.text();
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
await readHttpError(providerName, response);
|
throw new Error(formatHttpErrorMessage(providerName, response, text));
|
||||||
}
|
}
|
||||||
|
|
||||||
return (await response.json()) as T;
|
try {
|
||||||
|
return JSON.parse(text) as T;
|
||||||
|
} catch {
|
||||||
|
throw new Error(formatHttpErrorMessage(providerName, response, `invalid JSON response: ${text}`));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -107,8 +107,16 @@ export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "t
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
|
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
|
||||||
const descriptor = getProviderDescriptor(providerConfig);
|
switch (providerConfig.type) {
|
||||||
return descriptor.createProvider(providerConfig as never);
|
case "exa":
|
||||||
|
return providerDescriptors.exa.createProvider(providerConfig);
|
||||||
|
case "tavily":
|
||||||
|
return providerDescriptors.tavily.createProvider(providerConfig);
|
||||||
|
case "firecrawl":
|
||||||
|
return providerDescriptors.firecrawl.createProvider(providerConfig);
|
||||||
|
default:
|
||||||
|
throw new Error(`Unknown provider type: ${(providerConfig as { type: string }).type}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function validateSearchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedSearchRequest) {
|
export function validateSearchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedSearchRequest) {
|
||||||
|
|||||||
@@ -431,7 +431,7 @@ test("search records provider factory failures and follows fallbacks", async ()
|
|||||||
}),
|
}),
|
||||||
createProvider(providerConfig) {
|
createProvider(providerConfig) {
|
||||||
if (providerConfig.name === "firecrawl-main") {
|
if (providerConfig.name === "firecrawl-main") {
|
||||||
throw new Error("factory boom:firecrawl-main");
|
throw "factory boom:firecrawl-main";
|
||||||
}
|
}
|
||||||
|
|
||||||
return createProvider(providerConfig.name, providerConfig.type, {
|
return createProvider(providerConfig.name, providerConfig.type, {
|
||||||
|
|||||||
@@ -58,11 +58,14 @@ function buildExecutionMeta(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function describeError(error: unknown) {
|
||||||
|
return error instanceof Error ? error.message : String(error);
|
||||||
|
}
|
||||||
|
|
||||||
function attachAttempts(error: unknown, attempts: ProviderExecutionAttempt[]) {
|
function attachAttempts(error: unknown, attempts: ProviderExecutionAttempt[]) {
|
||||||
if (error instanceof Error) {
|
const normalizedError = error instanceof Error ? error : new Error(String(error));
|
||||||
(error as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
|
(normalizedError as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
|
||||||
}
|
return normalizedError;
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createWebSearchRuntime(
|
export function createWebSearchRuntime(
|
||||||
@@ -125,7 +128,7 @@ export function createWebSearchRuntime(
|
|||||||
attempts.push({
|
attempts.push({
|
||||||
providerName,
|
providerName,
|
||||||
status: "failed",
|
status: "failed",
|
||||||
reason: (error as Error).message,
|
reason: describeError(error),
|
||||||
});
|
});
|
||||||
lastError = error;
|
lastError = error;
|
||||||
|
|
||||||
@@ -155,7 +158,7 @@ export function createWebSearchRuntime(
|
|||||||
attempts.push({
|
attempts.push({
|
||||||
providerName,
|
providerName,
|
||||||
status: "failed",
|
status: "failed",
|
||||||
reason: (error as Error).message,
|
reason: describeError(error),
|
||||||
});
|
});
|
||||||
lastError = error;
|
lastError = error;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user