fix: validate Firecrawl requests before fallback

This commit is contained in:
pi
2026-04-12 03:10:13 +01:00
parent 98a966cade
commit 02b46c24b6
3 changed files with 256 additions and 27 deletions

View File

@@ -44,11 +44,43 @@ function resolveBaseUrl(config: FirecrawlProviderConfig) {
return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL; return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL;
} }
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) { function createProviderValidationError(providerName: string, message: string) {
if ((includeDomains?.length ?? 0) > 1) { return new Error(`Provider "${providerName}" ${message}`);
throw new Error("Firecrawl currently supports at most one includeDomains entry."); }
export function validateFirecrawlSearchRequest(providerName: string, request: NormalizedSearchRequest) {
if ((request.includeDomains?.length ?? 0) > 1) {
throw createProviderValidationError(providerName, "accepts at most one includeDomains entry.");
} }
if (request.category && request.firecrawl?.categories?.length) {
throw createProviderValidationError(providerName, "does not accept both top-level category and firecrawl.categories.");
}
}
export function validateFirecrawlFetchRequest(providerName: string, request: NormalizedFetchRequest) {
if (request.highlights) {
throw createProviderValidationError(providerName, 'does not support generic fetch option "highlights".');
}
const overrideFormats = request.firecrawl?.formats;
if (overrideFormats?.length) {
if (request.text && !overrideFormats.includes("markdown")) {
throw createProviderValidationError(
providerName,
'requires firecrawl.formats to include "markdown" when text is true.',
);
}
if (request.summary && !overrideFormats.includes("summary")) {
throw createProviderValidationError(
providerName,
'requires firecrawl.formats to include "summary" when summary is true.',
);
}
}
}
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) {
const parts = [query.trim()]; const parts = [query.trim()];
if (includeDomains?.[0]) { if (includeDomains?.[0]) {
parts.push(`site:${includeDomains[0]}`); parts.push(`site:${includeDomains[0]}`);
@@ -61,10 +93,6 @@ function appendSearchOperators(query: string, includeDomains?: string[], exclude
} }
function resolveSearchCategories(request: NormalizedSearchRequest) { function resolveSearchCategories(request: NormalizedSearchRequest) {
if (request.category && request.firecrawl?.categories?.length) {
throw new Error("Firecrawl does not accept both top-level category and firecrawl.categories.");
}
if (request.firecrawl?.categories?.length) { if (request.firecrawl?.categories?.length) {
return request.firecrawl.categories; return request.firecrawl.categories;
} }
@@ -77,18 +105,8 @@ function uniqueFormats(formats: string[]) {
} }
function resolveFetchFormats(request: NormalizedFetchRequest) { function resolveFetchFormats(request: NormalizedFetchRequest) {
if (request.highlights) {
throw new Error('Firecrawl does not support generic fetch option "highlights".');
}
const overrideFormats = request.firecrawl?.formats; const overrideFormats = request.firecrawl?.formats;
if (overrideFormats?.length) { if (overrideFormats?.length) {
if (request.text && !overrideFormats.includes("markdown")) {
throw new Error('Firecrawl fetch option "text" requires firecrawl.formats to include "markdown".');
}
if (request.summary && !overrideFormats.includes("summary")) {
throw new Error('Firecrawl fetch option "summary" requires firecrawl.formats to include "summary".');
}
return uniqueFormats([...overrideFormats]); return uniqueFormats([...overrideFormats]);
} }
@@ -123,6 +141,8 @@ export function createFirecrawlProvider(
type: config.type, type: config.type,
async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> { async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> {
validateFirecrawlSearchRequest(config.name, request);
const payload = await postJson<FirecrawlSearchPayload>({ const payload = await postJson<FirecrawlSearchPayload>({
providerName: config.name, providerName: config.name,
baseUrl, baseUrl,
@@ -155,6 +175,7 @@ export function createFirecrawlProvider(
}, },
async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> { async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> {
validateFirecrawlFetchRequest(config.name, request);
const formats = resolveFetchFormats(request); const formats = resolveFetchFormats(request);
const results = await Promise.all( const results = await Promise.all(

View File

@@ -1,6 +1,10 @@
import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts"; import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts";
import { createExaProvider } from "./exa.ts"; import { createExaProvider } from "./exa.ts";
import { createFirecrawlProvider } from "./firecrawl.ts"; import {
createFirecrawlProvider,
validateFirecrawlFetchRequest,
validateFirecrawlSearchRequest,
} from "./firecrawl.ts";
import { createTavilyProvider } from "./tavily.ts"; import { createTavilyProvider } from "./tavily.ts";
import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts"; import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts";
@@ -69,6 +73,12 @@ const providerDescriptors = {
createProvider(config: FirecrawlProviderConfig) { createProvider(config: FirecrawlProviderConfig) {
return createFirecrawlProvider(config); return createFirecrawlProvider(config);
}, },
validateSearchRequest(providerName: string, request: NormalizedSearchRequest) {
validateFirecrawlSearchRequest(providerName, request);
},
validateFetchRequest(providerName: string, request: NormalizedFetchRequest) {
validateFirecrawlFetchRequest(providerName, request);
},
}, },
} satisfies Record<WebSearchProviderConfig["type"], ProviderDescriptor>; } satisfies Record<WebSearchProviderConfig["type"], ProviderDescriptor>;
@@ -89,7 +99,11 @@ function validateOptionBlocks(
export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "type"> | WebSearchProviderConfig["type"]) { export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "type"> | WebSearchProviderConfig["type"]) {
const type = typeof provider === "string" ? provider : provider.type; const type = typeof provider === "string" ? provider : provider.type;
return providerDescriptors[type]; const descriptor = providerDescriptors[type as keyof typeof providerDescriptors];
if (!descriptor) {
throw new Error(`Unknown provider type: ${type}`);
}
return descriptor;
} }
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) { export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {

View File

@@ -124,21 +124,145 @@ test("search rejects a mismatched provider-specific options block before provide
assert.equal(callCount, 0); assert.equal(callCount, 0);
}); });
test("fetch rejects Firecrawl highlights before provider execution", async () => { test("search rejects Firecrawl requests with multiple includeDomains before provider execution", async () => {
let callCount = 0; const calls: string[] = [];
const runtime = createWebSearchRuntime({ const runtime = createWebSearchRuntime({
loadConfig: async () => ({ loadConfig: async () => ({
path: "test.json", path: "test.json",
defaultProviderName: "firecrawl-main", defaultProviderName: "firecrawl-main",
defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }, defaultProvider: {
providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }], name: "firecrawl-main",
providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]), type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
[
"firecrawl-main",
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => {
calls.push(providerConfig.name);
throw new Error(`boom:${providerConfig.name}`);
},
});
},
});
await assert.rejects(
() =>
runtime.search({
query: "pi docs",
provider: "firecrawl-main",
includeDomains: ["pi.dev", "exa.ai"],
}),
/Provider "firecrawl-main" accepts at most one includeDomains entry/,
);
assert.deepEqual(calls, []);
});
test("search rejects Firecrawl category conflicts before provider execution", async () => {
const calls: string[] = [];
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "firecrawl-main",
defaultProvider: {
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
[
"firecrawl-main",
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => {
calls.push(providerConfig.name);
throw new Error(`boom:${providerConfig.name}`);
},
});
},
});
await assert.rejects(
() =>
runtime.search({
query: "pi docs",
provider: "firecrawl-main",
category: "research",
firecrawl: { categories: ["github"] },
}),
/Provider "firecrawl-main" does not accept both top-level category and firecrawl.categories/,
);
assert.deepEqual(calls, []);
});
test("fetch rejects Firecrawl highlights before provider execution", async () => {
const calls: string[] = [];
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "firecrawl-main",
defaultProvider: {
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
[
"firecrawl-main",
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}), }),
createProvider(providerConfig) { createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, { return createProvider(providerConfig.name, providerConfig.type, {
fetch: async () => { fetch: async () => {
callCount += 1; calls.push(providerConfig.name);
return { return {
providerName: providerConfig.name, providerName: providerConfig.name,
results: [], results: [],
@@ -149,10 +273,80 @@ test("fetch rejects Firecrawl highlights before provider execution", async () =>
}); });
await assert.rejects( await assert.rejects(
() => runtime.fetch({ urls: ["https://pi.dev"], highlights: true }), () => runtime.fetch({ urls: ["https://pi.dev"], provider: "firecrawl-main", highlights: true }),
/does not support generic fetch option "highlights"/, /does not support generic fetch option "highlights"/,
); );
assert.equal(callCount, 0); assert.deepEqual(calls, []);
});
test("fetch rejects Firecrawl format mismatches before provider execution", async () => {
const calls: string[] = [];
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "firecrawl-main",
defaultProvider: {
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
[
"firecrawl-main",
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
fetch: async () => {
calls.push(providerConfig.name);
return {
providerName: providerConfig.name,
results: [],
};
},
});
},
});
await assert.rejects(
() =>
runtime.fetch({
urls: ["https://pi.dev"],
provider: "firecrawl-main",
summary: true,
firecrawl: { formats: ["markdown"] },
}),
/Provider "firecrawl-main" requires firecrawl.formats to include "summary" when summary is true/,
);
assert.deepEqual(calls, []);
});
test("search throws a clear error for unknown provider types", async () => {
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "mystery-main",
defaultProvider: { name: "mystery-main", type: "mystery", apiKey: "??" } as any,
providers: [{ name: "mystery-main", type: "mystery", apiKey: "??" } as any],
providersByName: new Map([["mystery-main", { name: "mystery-main", type: "mystery", apiKey: "??" } as any]]),
}),
});
await assert.rejects(() => runtime.search({ query: "pi docs" }), /Unknown provider type: mystery/);
}); });
test("search starts with the explicitly requested provider and still follows its fallback chain", async () => { test("search starts with the explicitly requested provider and still follows its fallback chain", async () => {