fix: validate Firecrawl requests before fallback
This commit is contained in:
@@ -44,11 +44,43 @@ function resolveBaseUrl(config: FirecrawlProviderConfig) {
|
||||
return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL;
|
||||
}
|
||||
|
||||
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) {
|
||||
if ((includeDomains?.length ?? 0) > 1) {
|
||||
throw new Error("Firecrawl currently supports at most one includeDomains entry.");
|
||||
function createProviderValidationError(providerName: string, message: string) {
|
||||
return new Error(`Provider "${providerName}" ${message}`);
|
||||
}
|
||||
|
||||
export function validateFirecrawlSearchRequest(providerName: string, request: NormalizedSearchRequest) {
|
||||
if ((request.includeDomains?.length ?? 0) > 1) {
|
||||
throw createProviderValidationError(providerName, "accepts at most one includeDomains entry.");
|
||||
}
|
||||
|
||||
if (request.category && request.firecrawl?.categories?.length) {
|
||||
throw createProviderValidationError(providerName, "does not accept both top-level category and firecrawl.categories.");
|
||||
}
|
||||
}
|
||||
|
||||
export function validateFirecrawlFetchRequest(providerName: string, request: NormalizedFetchRequest) {
|
||||
if (request.highlights) {
|
||||
throw createProviderValidationError(providerName, 'does not support generic fetch option "highlights".');
|
||||
}
|
||||
|
||||
const overrideFormats = request.firecrawl?.formats;
|
||||
if (overrideFormats?.length) {
|
||||
if (request.text && !overrideFormats.includes("markdown")) {
|
||||
throw createProviderValidationError(
|
||||
providerName,
|
||||
'requires firecrawl.formats to include "markdown" when text is true.',
|
||||
);
|
||||
}
|
||||
if (request.summary && !overrideFormats.includes("summary")) {
|
||||
throw createProviderValidationError(
|
||||
providerName,
|
||||
'requires firecrawl.formats to include "summary" when summary is true.',
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) {
|
||||
const parts = [query.trim()];
|
||||
if (includeDomains?.[0]) {
|
||||
parts.push(`site:${includeDomains[0]}`);
|
||||
@@ -61,10 +93,6 @@ function appendSearchOperators(query: string, includeDomains?: string[], exclude
|
||||
}
|
||||
|
||||
function resolveSearchCategories(request: NormalizedSearchRequest) {
|
||||
if (request.category && request.firecrawl?.categories?.length) {
|
||||
throw new Error("Firecrawl does not accept both top-level category and firecrawl.categories.");
|
||||
}
|
||||
|
||||
if (request.firecrawl?.categories?.length) {
|
||||
return request.firecrawl.categories;
|
||||
}
|
||||
@@ -77,18 +105,8 @@ function uniqueFormats(formats: string[]) {
|
||||
}
|
||||
|
||||
function resolveFetchFormats(request: NormalizedFetchRequest) {
|
||||
if (request.highlights) {
|
||||
throw new Error('Firecrawl does not support generic fetch option "highlights".');
|
||||
}
|
||||
|
||||
const overrideFormats = request.firecrawl?.formats;
|
||||
if (overrideFormats?.length) {
|
||||
if (request.text && !overrideFormats.includes("markdown")) {
|
||||
throw new Error('Firecrawl fetch option "text" requires firecrawl.formats to include "markdown".');
|
||||
}
|
||||
if (request.summary && !overrideFormats.includes("summary")) {
|
||||
throw new Error('Firecrawl fetch option "summary" requires firecrawl.formats to include "summary".');
|
||||
}
|
||||
return uniqueFormats([...overrideFormats]);
|
||||
}
|
||||
|
||||
@@ -123,6 +141,8 @@ export function createFirecrawlProvider(
|
||||
type: config.type,
|
||||
|
||||
async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> {
|
||||
validateFirecrawlSearchRequest(config.name, request);
|
||||
|
||||
const payload = await postJson<FirecrawlSearchPayload>({
|
||||
providerName: config.name,
|
||||
baseUrl,
|
||||
@@ -155,6 +175,7 @@ export function createFirecrawlProvider(
|
||||
},
|
||||
|
||||
async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> {
|
||||
validateFirecrawlFetchRequest(config.name, request);
|
||||
const formats = resolveFetchFormats(request);
|
||||
|
||||
const results = await Promise.all(
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts";
|
||||
import { createExaProvider } from "./exa.ts";
|
||||
import { createFirecrawlProvider } from "./firecrawl.ts";
|
||||
import {
|
||||
createFirecrawlProvider,
|
||||
validateFirecrawlFetchRequest,
|
||||
validateFirecrawlSearchRequest,
|
||||
} from "./firecrawl.ts";
|
||||
import { createTavilyProvider } from "./tavily.ts";
|
||||
import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts";
|
||||
|
||||
@@ -69,6 +73,12 @@ const providerDescriptors = {
|
||||
createProvider(config: FirecrawlProviderConfig) {
|
||||
return createFirecrawlProvider(config);
|
||||
},
|
||||
validateSearchRequest(providerName: string, request: NormalizedSearchRequest) {
|
||||
validateFirecrawlSearchRequest(providerName, request);
|
||||
},
|
||||
validateFetchRequest(providerName: string, request: NormalizedFetchRequest) {
|
||||
validateFirecrawlFetchRequest(providerName, request);
|
||||
},
|
||||
},
|
||||
} satisfies Record<WebSearchProviderConfig["type"], ProviderDescriptor>;
|
||||
|
||||
@@ -89,7 +99,11 @@ function validateOptionBlocks(
|
||||
|
||||
export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "type"> | WebSearchProviderConfig["type"]) {
|
||||
const type = typeof provider === "string" ? provider : provider.type;
|
||||
return providerDescriptors[type];
|
||||
const descriptor = providerDescriptors[type as keyof typeof providerDescriptors];
|
||||
if (!descriptor) {
|
||||
throw new Error(`Unknown provider type: ${type}`);
|
||||
}
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
|
||||
|
||||
@@ -124,21 +124,145 @@ test("search rejects a mismatched provider-specific options block before provide
|
||||
assert.equal(callCount, 0);
|
||||
});
|
||||
|
||||
test("fetch rejects Firecrawl highlights before provider execution", async () => {
|
||||
let callCount = 0;
|
||||
test("search rejects Firecrawl requests with multiple includeDomains before provider execution", async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
const runtime = createWebSearchRuntime({
|
||||
loadConfig: async () => ({
|
||||
path: "test.json",
|
||||
defaultProviderName: "firecrawl-main",
|
||||
defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" },
|
||||
providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }],
|
||||
providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]),
|
||||
defaultProvider: {
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
providers: [
|
||||
{
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||
],
|
||||
providersByName: new Map([
|
||||
[
|
||||
"firecrawl-main",
|
||||
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||
],
|
||||
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||
]),
|
||||
}),
|
||||
createProvider(providerConfig) {
|
||||
return createProvider(providerConfig.name, providerConfig.type, {
|
||||
search: async () => {
|
||||
calls.push(providerConfig.name);
|
||||
throw new Error(`boom:${providerConfig.name}`);
|
||||
},
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
await assert.rejects(
|
||||
() =>
|
||||
runtime.search({
|
||||
query: "pi docs",
|
||||
provider: "firecrawl-main",
|
||||
includeDomains: ["pi.dev", "exa.ai"],
|
||||
}),
|
||||
/Provider "firecrawl-main" accepts at most one includeDomains entry/,
|
||||
);
|
||||
assert.deepEqual(calls, []);
|
||||
});
|
||||
|
||||
test("search rejects Firecrawl category conflicts before provider execution", async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
const runtime = createWebSearchRuntime({
|
||||
loadConfig: async () => ({
|
||||
path: "test.json",
|
||||
defaultProviderName: "firecrawl-main",
|
||||
defaultProvider: {
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
providers: [
|
||||
{
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||
],
|
||||
providersByName: new Map([
|
||||
[
|
||||
"firecrawl-main",
|
||||
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||
],
|
||||
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||
]),
|
||||
}),
|
||||
createProvider(providerConfig) {
|
||||
return createProvider(providerConfig.name, providerConfig.type, {
|
||||
search: async () => {
|
||||
calls.push(providerConfig.name);
|
||||
throw new Error(`boom:${providerConfig.name}`);
|
||||
},
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
await assert.rejects(
|
||||
() =>
|
||||
runtime.search({
|
||||
query: "pi docs",
|
||||
provider: "firecrawl-main",
|
||||
category: "research",
|
||||
firecrawl: { categories: ["github"] },
|
||||
}),
|
||||
/Provider "firecrawl-main" does not accept both top-level category and firecrawl.categories/,
|
||||
);
|
||||
assert.deepEqual(calls, []);
|
||||
});
|
||||
|
||||
test("fetch rejects Firecrawl highlights before provider execution", async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
const runtime = createWebSearchRuntime({
|
||||
loadConfig: async () => ({
|
||||
path: "test.json",
|
||||
defaultProviderName: "firecrawl-main",
|
||||
defaultProvider: {
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
providers: [
|
||||
{
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||
],
|
||||
providersByName: new Map([
|
||||
[
|
||||
"firecrawl-main",
|
||||
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||
],
|
||||
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||
]),
|
||||
}),
|
||||
createProvider(providerConfig) {
|
||||
return createProvider(providerConfig.name, providerConfig.type, {
|
||||
fetch: async () => {
|
||||
callCount += 1;
|
||||
calls.push(providerConfig.name);
|
||||
return {
|
||||
providerName: providerConfig.name,
|
||||
results: [],
|
||||
@@ -149,10 +273,80 @@ test("fetch rejects Firecrawl highlights before provider execution", async () =>
|
||||
});
|
||||
|
||||
await assert.rejects(
|
||||
() => runtime.fetch({ urls: ["https://pi.dev"], highlights: true }),
|
||||
() => runtime.fetch({ urls: ["https://pi.dev"], provider: "firecrawl-main", highlights: true }),
|
||||
/does not support generic fetch option "highlights"/,
|
||||
);
|
||||
assert.equal(callCount, 0);
|
||||
assert.deepEqual(calls, []);
|
||||
});
|
||||
|
||||
test("fetch rejects Firecrawl format mismatches before provider execution", async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
const runtime = createWebSearchRuntime({
|
||||
loadConfig: async () => ({
|
||||
path: "test.json",
|
||||
defaultProviderName: "firecrawl-main",
|
||||
defaultProvider: {
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
providers: [
|
||||
{
|
||||
name: "firecrawl-main",
|
||||
type: "firecrawl",
|
||||
apiKey: "fc",
|
||||
fallbackProviders: ["exa-fallback"],
|
||||
},
|
||||
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||
],
|
||||
providersByName: new Map([
|
||||
[
|
||||
"firecrawl-main",
|
||||
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||
],
|
||||
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||
]),
|
||||
}),
|
||||
createProvider(providerConfig) {
|
||||
return createProvider(providerConfig.name, providerConfig.type, {
|
||||
fetch: async () => {
|
||||
calls.push(providerConfig.name);
|
||||
return {
|
||||
providerName: providerConfig.name,
|
||||
results: [],
|
||||
};
|
||||
},
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
await assert.rejects(
|
||||
() =>
|
||||
runtime.fetch({
|
||||
urls: ["https://pi.dev"],
|
||||
provider: "firecrawl-main",
|
||||
summary: true,
|
||||
firecrawl: { formats: ["markdown"] },
|
||||
}),
|
||||
/Provider "firecrawl-main" requires firecrawl.formats to include "summary" when summary is true/,
|
||||
);
|
||||
assert.deepEqual(calls, []);
|
||||
});
|
||||
|
||||
test("search throws a clear error for unknown provider types", async () => {
|
||||
const runtime = createWebSearchRuntime({
|
||||
loadConfig: async () => ({
|
||||
path: "test.json",
|
||||
defaultProviderName: "mystery-main",
|
||||
defaultProvider: { name: "mystery-main", type: "mystery", apiKey: "??" } as any,
|
||||
providers: [{ name: "mystery-main", type: "mystery", apiKey: "??" } as any],
|
||||
providersByName: new Map([["mystery-main", { name: "mystery-main", type: "mystery", apiKey: "??" } as any]]),
|
||||
}),
|
||||
});
|
||||
|
||||
await assert.rejects(() => runtime.search({ query: "pi docs" }), /Unknown provider type: mystery/);
|
||||
});
|
||||
|
||||
test("search starts with the explicitly requested provider and still follows its fallback chain", async () => {
|
||||
|
||||
Reference in New Issue
Block a user