fix: validate Firecrawl requests before fallback
This commit is contained in:
@@ -44,11 +44,43 @@ function resolveBaseUrl(config: FirecrawlProviderConfig) {
|
|||||||
return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL;
|
return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL;
|
||||||
}
|
}
|
||||||
|
|
||||||
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) {
|
function createProviderValidationError(providerName: string, message: string) {
|
||||||
if ((includeDomains?.length ?? 0) > 1) {
|
return new Error(`Provider "${providerName}" ${message}`);
|
||||||
throw new Error("Firecrawl currently supports at most one includeDomains entry.");
|
}
|
||||||
|
|
||||||
|
export function validateFirecrawlSearchRequest(providerName: string, request: NormalizedSearchRequest) {
|
||||||
|
if ((request.includeDomains?.length ?? 0) > 1) {
|
||||||
|
throw createProviderValidationError(providerName, "accepts at most one includeDomains entry.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request.category && request.firecrawl?.categories?.length) {
|
||||||
|
throw createProviderValidationError(providerName, "does not accept both top-level category and firecrawl.categories.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validateFirecrawlFetchRequest(providerName: string, request: NormalizedFetchRequest) {
|
||||||
|
if (request.highlights) {
|
||||||
|
throw createProviderValidationError(providerName, 'does not support generic fetch option "highlights".');
|
||||||
|
}
|
||||||
|
|
||||||
|
const overrideFormats = request.firecrawl?.formats;
|
||||||
|
if (overrideFormats?.length) {
|
||||||
|
if (request.text && !overrideFormats.includes("markdown")) {
|
||||||
|
throw createProviderValidationError(
|
||||||
|
providerName,
|
||||||
|
'requires firecrawl.formats to include "markdown" when text is true.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (request.summary && !overrideFormats.includes("summary")) {
|
||||||
|
throw createProviderValidationError(
|
||||||
|
providerName,
|
||||||
|
'requires firecrawl.formats to include "summary" when summary is true.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) {
|
||||||
const parts = [query.trim()];
|
const parts = [query.trim()];
|
||||||
if (includeDomains?.[0]) {
|
if (includeDomains?.[0]) {
|
||||||
parts.push(`site:${includeDomains[0]}`);
|
parts.push(`site:${includeDomains[0]}`);
|
||||||
@@ -61,10 +93,6 @@ function appendSearchOperators(query: string, includeDomains?: string[], exclude
|
|||||||
}
|
}
|
||||||
|
|
||||||
function resolveSearchCategories(request: NormalizedSearchRequest) {
|
function resolveSearchCategories(request: NormalizedSearchRequest) {
|
||||||
if (request.category && request.firecrawl?.categories?.length) {
|
|
||||||
throw new Error("Firecrawl does not accept both top-level category and firecrawl.categories.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (request.firecrawl?.categories?.length) {
|
if (request.firecrawl?.categories?.length) {
|
||||||
return request.firecrawl.categories;
|
return request.firecrawl.categories;
|
||||||
}
|
}
|
||||||
@@ -77,18 +105,8 @@ function uniqueFormats(formats: string[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function resolveFetchFormats(request: NormalizedFetchRequest) {
|
function resolveFetchFormats(request: NormalizedFetchRequest) {
|
||||||
if (request.highlights) {
|
|
||||||
throw new Error('Firecrawl does not support generic fetch option "highlights".');
|
|
||||||
}
|
|
||||||
|
|
||||||
const overrideFormats = request.firecrawl?.formats;
|
const overrideFormats = request.firecrawl?.formats;
|
||||||
if (overrideFormats?.length) {
|
if (overrideFormats?.length) {
|
||||||
if (request.text && !overrideFormats.includes("markdown")) {
|
|
||||||
throw new Error('Firecrawl fetch option "text" requires firecrawl.formats to include "markdown".');
|
|
||||||
}
|
|
||||||
if (request.summary && !overrideFormats.includes("summary")) {
|
|
||||||
throw new Error('Firecrawl fetch option "summary" requires firecrawl.formats to include "summary".');
|
|
||||||
}
|
|
||||||
return uniqueFormats([...overrideFormats]);
|
return uniqueFormats([...overrideFormats]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,6 +141,8 @@ export function createFirecrawlProvider(
|
|||||||
type: config.type,
|
type: config.type,
|
||||||
|
|
||||||
async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> {
|
async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> {
|
||||||
|
validateFirecrawlSearchRequest(config.name, request);
|
||||||
|
|
||||||
const payload = await postJson<FirecrawlSearchPayload>({
|
const payload = await postJson<FirecrawlSearchPayload>({
|
||||||
providerName: config.name,
|
providerName: config.name,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
@@ -155,6 +175,7 @@ export function createFirecrawlProvider(
|
|||||||
},
|
},
|
||||||
|
|
||||||
async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> {
|
async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> {
|
||||||
|
validateFirecrawlFetchRequest(config.name, request);
|
||||||
const formats = resolveFetchFormats(request);
|
const formats = resolveFetchFormats(request);
|
||||||
|
|
||||||
const results = await Promise.all(
|
const results = await Promise.all(
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts";
|
import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts";
|
||||||
import { createExaProvider } from "./exa.ts";
|
import { createExaProvider } from "./exa.ts";
|
||||||
import { createFirecrawlProvider } from "./firecrawl.ts";
|
import {
|
||||||
|
createFirecrawlProvider,
|
||||||
|
validateFirecrawlFetchRequest,
|
||||||
|
validateFirecrawlSearchRequest,
|
||||||
|
} from "./firecrawl.ts";
|
||||||
import { createTavilyProvider } from "./tavily.ts";
|
import { createTavilyProvider } from "./tavily.ts";
|
||||||
import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts";
|
import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts";
|
||||||
|
|
||||||
@@ -69,6 +73,12 @@ const providerDescriptors = {
|
|||||||
createProvider(config: FirecrawlProviderConfig) {
|
createProvider(config: FirecrawlProviderConfig) {
|
||||||
return createFirecrawlProvider(config);
|
return createFirecrawlProvider(config);
|
||||||
},
|
},
|
||||||
|
validateSearchRequest(providerName: string, request: NormalizedSearchRequest) {
|
||||||
|
validateFirecrawlSearchRequest(providerName, request);
|
||||||
|
},
|
||||||
|
validateFetchRequest(providerName: string, request: NormalizedFetchRequest) {
|
||||||
|
validateFirecrawlFetchRequest(providerName, request);
|
||||||
|
},
|
||||||
},
|
},
|
||||||
} satisfies Record<WebSearchProviderConfig["type"], ProviderDescriptor>;
|
} satisfies Record<WebSearchProviderConfig["type"], ProviderDescriptor>;
|
||||||
|
|
||||||
@@ -89,7 +99,11 @@ function validateOptionBlocks(
|
|||||||
|
|
||||||
export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "type"> | WebSearchProviderConfig["type"]) {
|
export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "type"> | WebSearchProviderConfig["type"]) {
|
||||||
const type = typeof provider === "string" ? provider : provider.type;
|
const type = typeof provider === "string" ? provider : provider.type;
|
||||||
return providerDescriptors[type];
|
const descriptor = providerDescriptors[type as keyof typeof providerDescriptors];
|
||||||
|
if (!descriptor) {
|
||||||
|
throw new Error(`Unknown provider type: ${type}`);
|
||||||
|
}
|
||||||
|
return descriptor;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
|
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
|
||||||
|
|||||||
@@ -124,21 +124,145 @@ test("search rejects a mismatched provider-specific options block before provide
|
|||||||
assert.equal(callCount, 0);
|
assert.equal(callCount, 0);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("fetch rejects Firecrawl highlights before provider execution", async () => {
|
test("search rejects Firecrawl requests with multiple includeDomains before provider execution", async () => {
|
||||||
let callCount = 0;
|
const calls: string[] = [];
|
||||||
|
|
||||||
const runtime = createWebSearchRuntime({
|
const runtime = createWebSearchRuntime({
|
||||||
loadConfig: async () => ({
|
loadConfig: async () => ({
|
||||||
path: "test.json",
|
path: "test.json",
|
||||||
defaultProviderName: "firecrawl-main",
|
defaultProviderName: "firecrawl-main",
|
||||||
defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" },
|
defaultProvider: {
|
||||||
providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }],
|
name: "firecrawl-main",
|
||||||
providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]),
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
providers: [
|
||||||
|
{
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||||
|
],
|
||||||
|
providersByName: new Map([
|
||||||
|
[
|
||||||
|
"firecrawl-main",
|
||||||
|
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||||
|
],
|
||||||
|
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||||
|
]),
|
||||||
|
}),
|
||||||
|
createProvider(providerConfig) {
|
||||||
|
return createProvider(providerConfig.name, providerConfig.type, {
|
||||||
|
search: async () => {
|
||||||
|
calls.push(providerConfig.name);
|
||||||
|
throw new Error(`boom:${providerConfig.name}`);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await assert.rejects(
|
||||||
|
() =>
|
||||||
|
runtime.search({
|
||||||
|
query: "pi docs",
|
||||||
|
provider: "firecrawl-main",
|
||||||
|
includeDomains: ["pi.dev", "exa.ai"],
|
||||||
|
}),
|
||||||
|
/Provider "firecrawl-main" accepts at most one includeDomains entry/,
|
||||||
|
);
|
||||||
|
assert.deepEqual(calls, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("search rejects Firecrawl category conflicts before provider execution", async () => {
|
||||||
|
const calls: string[] = [];
|
||||||
|
|
||||||
|
const runtime = createWebSearchRuntime({
|
||||||
|
loadConfig: async () => ({
|
||||||
|
path: "test.json",
|
||||||
|
defaultProviderName: "firecrawl-main",
|
||||||
|
defaultProvider: {
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
providers: [
|
||||||
|
{
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||||
|
],
|
||||||
|
providersByName: new Map([
|
||||||
|
[
|
||||||
|
"firecrawl-main",
|
||||||
|
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||||
|
],
|
||||||
|
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||||
|
]),
|
||||||
|
}),
|
||||||
|
createProvider(providerConfig) {
|
||||||
|
return createProvider(providerConfig.name, providerConfig.type, {
|
||||||
|
search: async () => {
|
||||||
|
calls.push(providerConfig.name);
|
||||||
|
throw new Error(`boom:${providerConfig.name}`);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await assert.rejects(
|
||||||
|
() =>
|
||||||
|
runtime.search({
|
||||||
|
query: "pi docs",
|
||||||
|
provider: "firecrawl-main",
|
||||||
|
category: "research",
|
||||||
|
firecrawl: { categories: ["github"] },
|
||||||
|
}),
|
||||||
|
/Provider "firecrawl-main" does not accept both top-level category and firecrawl.categories/,
|
||||||
|
);
|
||||||
|
assert.deepEqual(calls, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("fetch rejects Firecrawl highlights before provider execution", async () => {
|
||||||
|
const calls: string[] = [];
|
||||||
|
|
||||||
|
const runtime = createWebSearchRuntime({
|
||||||
|
loadConfig: async () => ({
|
||||||
|
path: "test.json",
|
||||||
|
defaultProviderName: "firecrawl-main",
|
||||||
|
defaultProvider: {
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
providers: [
|
||||||
|
{
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||||
|
],
|
||||||
|
providersByName: new Map([
|
||||||
|
[
|
||||||
|
"firecrawl-main",
|
||||||
|
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||||
|
],
|
||||||
|
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||||
|
]),
|
||||||
}),
|
}),
|
||||||
createProvider(providerConfig) {
|
createProvider(providerConfig) {
|
||||||
return createProvider(providerConfig.name, providerConfig.type, {
|
return createProvider(providerConfig.name, providerConfig.type, {
|
||||||
fetch: async () => {
|
fetch: async () => {
|
||||||
callCount += 1;
|
calls.push(providerConfig.name);
|
||||||
return {
|
return {
|
||||||
providerName: providerConfig.name,
|
providerName: providerConfig.name,
|
||||||
results: [],
|
results: [],
|
||||||
@@ -149,10 +273,80 @@ test("fetch rejects Firecrawl highlights before provider execution", async () =>
|
|||||||
});
|
});
|
||||||
|
|
||||||
await assert.rejects(
|
await assert.rejects(
|
||||||
() => runtime.fetch({ urls: ["https://pi.dev"], highlights: true }),
|
() => runtime.fetch({ urls: ["https://pi.dev"], provider: "firecrawl-main", highlights: true }),
|
||||||
/does not support generic fetch option "highlights"/,
|
/does not support generic fetch option "highlights"/,
|
||||||
);
|
);
|
||||||
assert.equal(callCount, 0);
|
assert.deepEqual(calls, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("fetch rejects Firecrawl format mismatches before provider execution", async () => {
|
||||||
|
const calls: string[] = [];
|
||||||
|
|
||||||
|
const runtime = createWebSearchRuntime({
|
||||||
|
loadConfig: async () => ({
|
||||||
|
path: "test.json",
|
||||||
|
defaultProviderName: "firecrawl-main",
|
||||||
|
defaultProvider: {
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
providers: [
|
||||||
|
{
|
||||||
|
name: "firecrawl-main",
|
||||||
|
type: "firecrawl",
|
||||||
|
apiKey: "fc",
|
||||||
|
fallbackProviders: ["exa-fallback"],
|
||||||
|
},
|
||||||
|
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
|
||||||
|
],
|
||||||
|
providersByName: new Map([
|
||||||
|
[
|
||||||
|
"firecrawl-main",
|
||||||
|
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
|
||||||
|
],
|
||||||
|
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
|
||||||
|
]),
|
||||||
|
}),
|
||||||
|
createProvider(providerConfig) {
|
||||||
|
return createProvider(providerConfig.name, providerConfig.type, {
|
||||||
|
fetch: async () => {
|
||||||
|
calls.push(providerConfig.name);
|
||||||
|
return {
|
||||||
|
providerName: providerConfig.name,
|
||||||
|
results: [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await assert.rejects(
|
||||||
|
() =>
|
||||||
|
runtime.fetch({
|
||||||
|
urls: ["https://pi.dev"],
|
||||||
|
provider: "firecrawl-main",
|
||||||
|
summary: true,
|
||||||
|
firecrawl: { formats: ["markdown"] },
|
||||||
|
}),
|
||||||
|
/Provider "firecrawl-main" requires firecrawl.formats to include "summary" when summary is true/,
|
||||||
|
);
|
||||||
|
assert.deepEqual(calls, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("search throws a clear error for unknown provider types", async () => {
|
||||||
|
const runtime = createWebSearchRuntime({
|
||||||
|
loadConfig: async () => ({
|
||||||
|
path: "test.json",
|
||||||
|
defaultProviderName: "mystery-main",
|
||||||
|
defaultProvider: { name: "mystery-main", type: "mystery", apiKey: "??" } as any,
|
||||||
|
providers: [{ name: "mystery-main", type: "mystery", apiKey: "??" } as any],
|
||||||
|
providersByName: new Map([["mystery-main", { name: "mystery-main", type: "mystery", apiKey: "??" } as any]]),
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
await assert.rejects(() => runtime.search({ query: "pi docs" }), /Unknown provider type: mystery/);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("search starts with the explicitly requested provider and still follows its fallback chain", async () => {
|
test("search starts with the explicitly requested provider and still follows its fallback chain", async () => {
|
||||||
|
|||||||
Reference in New Issue
Block a user