diff --git a/src/providers/firecrawl.ts b/src/providers/firecrawl.ts index 5389008..4876fc1 100644 --- a/src/providers/firecrawl.ts +++ b/src/providers/firecrawl.ts @@ -44,11 +44,43 @@ function resolveBaseUrl(config: FirecrawlProviderConfig) { return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL; } -function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) { - if ((includeDomains?.length ?? 0) > 1) { - throw new Error("Firecrawl currently supports at most one includeDomains entry."); +function createProviderValidationError(providerName: string, message: string) { + return new Error(`Provider "${providerName}" ${message}`); +} + +export function validateFirecrawlSearchRequest(providerName: string, request: NormalizedSearchRequest) { + if ((request.includeDomains?.length ?? 0) > 1) { + throw createProviderValidationError(providerName, "accepts at most one includeDomains entry."); } + if (request.category && request.firecrawl?.categories?.length) { + throw createProviderValidationError(providerName, "does not accept both top-level category and firecrawl.categories."); + } +} + +export function validateFirecrawlFetchRequest(providerName: string, request: NormalizedFetchRequest) { + if (request.highlights) { + throw createProviderValidationError(providerName, 'does not support generic fetch option "highlights".'); + } + + const overrideFormats = request.firecrawl?.formats; + if (overrideFormats?.length) { + if (request.text && !overrideFormats.includes("markdown")) { + throw createProviderValidationError( + providerName, + 'requires firecrawl.formats to include "markdown" when text is true.', + ); + } + if (request.summary && !overrideFormats.includes("summary")) { + throw createProviderValidationError( + providerName, + 'requires firecrawl.formats to include "summary" when summary is true.', + ); + } + } +} + +function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) { const parts = [query.trim()]; if (includeDomains?.[0]) { parts.push(`site:${includeDomains[0]}`); @@ -61,10 +93,6 @@ function appendSearchOperators(query: string, includeDomains?: string[], exclude } function resolveSearchCategories(request: NormalizedSearchRequest) { - if (request.category && request.firecrawl?.categories?.length) { - throw new Error("Firecrawl does not accept both top-level category and firecrawl.categories."); - } - if (request.firecrawl?.categories?.length) { return request.firecrawl.categories; } @@ -77,18 +105,8 @@ function uniqueFormats(formats: string[]) { } function resolveFetchFormats(request: NormalizedFetchRequest) { - if (request.highlights) { - throw new Error('Firecrawl does not support generic fetch option "highlights".'); - } - const overrideFormats = request.firecrawl?.formats; if (overrideFormats?.length) { - if (request.text && !overrideFormats.includes("markdown")) { - throw new Error('Firecrawl fetch option "text" requires firecrawl.formats to include "markdown".'); - } - if (request.summary && !overrideFormats.includes("summary")) { - throw new Error('Firecrawl fetch option "summary" requires firecrawl.formats to include "summary".'); - } return uniqueFormats([...overrideFormats]); } @@ -123,6 +141,8 @@ export function createFirecrawlProvider( type: config.type, async search(request: NormalizedSearchRequest): Promise { + validateFirecrawlSearchRequest(config.name, request); + const payload = await postJson({ providerName: config.name, baseUrl, @@ -155,6 +175,7 @@ export function createFirecrawlProvider( }, async fetch(request: NormalizedFetchRequest): Promise { + validateFirecrawlFetchRequest(config.name, request); const formats = resolveFetchFormats(request); const results = await Promise.all( diff --git a/src/providers/registry.ts b/src/providers/registry.ts index 7c61bef..5a34c5c 100644 --- a/src/providers/registry.ts +++ b/src/providers/registry.ts @@ -1,6 +1,10 @@ import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts"; import { createExaProvider } from "./exa.ts"; -import { createFirecrawlProvider } from "./firecrawl.ts"; +import { + createFirecrawlProvider, + validateFirecrawlFetchRequest, + validateFirecrawlSearchRequest, +} from "./firecrawl.ts"; import { createTavilyProvider } from "./tavily.ts"; import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts"; @@ -69,6 +73,12 @@ const providerDescriptors = { createProvider(config: FirecrawlProviderConfig) { return createFirecrawlProvider(config); }, + validateSearchRequest(providerName: string, request: NormalizedSearchRequest) { + validateFirecrawlSearchRequest(providerName, request); + }, + validateFetchRequest(providerName: string, request: NormalizedFetchRequest) { + validateFirecrawlFetchRequest(providerName, request); + }, }, } satisfies Record; @@ -89,7 +99,11 @@ function validateOptionBlocks( export function getProviderDescriptor(provider: Pick | WebSearchProviderConfig["type"]) { const type = typeof provider === "string" ? provider : provider.type; - return providerDescriptors[type]; + const descriptor = providerDescriptors[type as keyof typeof providerDescriptors]; + if (!descriptor) { + throw new Error(`Unknown provider type: ${type}`); + } + return descriptor; } export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) { diff --git a/src/runtime.test.ts b/src/runtime.test.ts index 99818f8..e8656a8 100644 --- a/src/runtime.test.ts +++ b/src/runtime.test.ts @@ -124,21 +124,145 @@ test("search rejects a mismatched provider-specific options block before provide assert.equal(callCount, 0); }); -test("fetch rejects Firecrawl highlights before provider execution", async () => { - let callCount = 0; +test("search rejects Firecrawl requests with multiple includeDomains before provider execution", async () => { + const calls: string[] = []; const runtime = createWebSearchRuntime({ loadConfig: async () => ({ path: "test.json", defaultProviderName: "firecrawl-main", - defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }, - providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }], - providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]), + defaultProvider: { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + { name: "exa-fallback", type: "exa", apiKey: "exa" }, + ], + providersByName: new Map([ + [ + "firecrawl-main", + { name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] }, + ], + ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], + ]), + }), + createProvider(providerConfig) { + return createProvider(providerConfig.name, providerConfig.type, { + search: async () => { + calls.push(providerConfig.name); + throw new Error(`boom:${providerConfig.name}`); + }, + }); + }, + }); + + await assert.rejects( + () => + runtime.search({ + query: "pi docs", + provider: "firecrawl-main", + includeDomains: ["pi.dev", "exa.ai"], + }), + /Provider "firecrawl-main" accepts at most one includeDomains entry/, + ); + assert.deepEqual(calls, []); +}); + +test("search rejects Firecrawl category conflicts before provider execution", async () => { + const calls: string[] = []; + + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "firecrawl-main", + defaultProvider: { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + { name: "exa-fallback", type: "exa", apiKey: "exa" }, + ], + providersByName: new Map([ + [ + "firecrawl-main", + { name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] }, + ], + ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], + ]), + }), + createProvider(providerConfig) { + return createProvider(providerConfig.name, providerConfig.type, { + search: async () => { + calls.push(providerConfig.name); + throw new Error(`boom:${providerConfig.name}`); + }, + }); + }, + }); + + await assert.rejects( + () => + runtime.search({ + query: "pi docs", + provider: "firecrawl-main", + category: "research", + firecrawl: { categories: ["github"] }, + }), + /Provider "firecrawl-main" does not accept both top-level category and firecrawl.categories/, + ); + assert.deepEqual(calls, []); +}); + +test("fetch rejects Firecrawl highlights before provider execution", async () => { + const calls: string[] = []; + + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "firecrawl-main", + defaultProvider: { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + { name: "exa-fallback", type: "exa", apiKey: "exa" }, + ], + providersByName: new Map([ + [ + "firecrawl-main", + { name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] }, + ], + ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], + ]), }), createProvider(providerConfig) { return createProvider(providerConfig.name, providerConfig.type, { fetch: async () => { - callCount += 1; + calls.push(providerConfig.name); return { providerName: providerConfig.name, results: [], @@ -149,10 +273,80 @@ test("fetch rejects Firecrawl highlights before provider execution", async () => }); await assert.rejects( - () => runtime.fetch({ urls: ["https://pi.dev"], highlights: true }), + () => runtime.fetch({ urls: ["https://pi.dev"], provider: "firecrawl-main", highlights: true }), /does not support generic fetch option "highlights"/, ); - assert.equal(callCount, 0); + assert.deepEqual(calls, []); +}); + +test("fetch rejects Firecrawl format mismatches before provider execution", async () => { + const calls: string[] = []; + + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "firecrawl-main", + defaultProvider: { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + { name: "exa-fallback", type: "exa", apiKey: "exa" }, + ], + providersByName: new Map([ + [ + "firecrawl-main", + { name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] }, + ], + ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], + ]), + }), + createProvider(providerConfig) { + return createProvider(providerConfig.name, providerConfig.type, { + fetch: async () => { + calls.push(providerConfig.name); + return { + providerName: providerConfig.name, + results: [], + }; + }, + }); + }, + }); + + await assert.rejects( + () => + runtime.fetch({ + urls: ["https://pi.dev"], + provider: "firecrawl-main", + summary: true, + firecrawl: { formats: ["markdown"] }, + }), + /Provider "firecrawl-main" requires firecrawl.formats to include "summary" when summary is true/, + ); + assert.deepEqual(calls, []); +}); + +test("search throws a clear error for unknown provider types", async () => { + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "mystery-main", + defaultProvider: { name: "mystery-main", type: "mystery", apiKey: "??" } as any, + providers: [{ name: "mystery-main", type: "mystery", apiKey: "??" } as any], + providersByName: new Map([["mystery-main", { name: "mystery-main", type: "mystery", apiKey: "??" } as any]]), + }), + }); + + await assert.rejects(() => runtime.search({ query: "pi docs" }), /Unknown provider type: mystery/); }); test("search starts with the explicitly requested provider and still follows its fallback chain", async () => {