diff --git a/README.md b/README.md index f49ada7..b986ebd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pi-web-search -`pi-web-search` is a Pi extension package that adds `web_search` and `web_fetch` tools backed by pluggable providers such as Exa and Tavily. +`pi-web-search` is a Pi extension package that adds `web_search` and `web_fetch` tools backed by pluggable providers such as Exa, Tavily, and Firecrawl. ## Install @@ -24,6 +24,71 @@ pi install https://gitea.rwiesner.com/pi/pi-web-search Provider configuration is managed by the extension's own commands and config files. +Example `~/.pi/agent/web-search.json`: + +```json +{ + "defaultProvider": "firecrawl-main", + "providers": [ + { + "name": "firecrawl-main", + "type": "firecrawl", + "apiKey": "fc-...", + "fallbackProviders": ["exa-fallback"] + }, + { + "name": "exa-fallback", + "type": "exa", + "apiKey": "exa_..." + } + ] +} +``` + +Self-hosted Firecrawl: + +```json +{ + "defaultProvider": "firecrawl-selfhosted", + "providers": [ + { + "name": "firecrawl-selfhosted", + "type": "firecrawl", + "baseUrl": "https://firecrawl.internal.example/v2" + } + ] +} +``` + +Tool examples: + +```json +{ + "query": "pi docs", + "provider": "firecrawl-main", + "firecrawl": { + "country": "DE", + "categories": ["github"], + "scrapeOptions": { + "formats": ["markdown"] + } + } +} +``` + +```json +{ + "urls": ["https://pi.dev"], + "provider": "firecrawl-main", + "summary": true, + "firecrawl": { + "formats": ["markdown", "summary", "images"] + } +} +``` + +Run `web-search-config` inside Pi to add or edit Tavily, Exa, and Firecrawl providers interactively. + ## Development ```bash diff --git a/src/commands/web-search-config.test.ts b/src/commands/web-search-config.test.ts index deccebe..a4ad1df 100644 --- a/src/commands/web-search-config.test.ts +++ b/src/commands/web-search-config.test.ts @@ -2,64 +2,81 @@ import test from "node:test"; import assert from "node:assert/strict"; import { createDefaultWebSearchConfig, - removeProviderOrThrow, - renameProviderOrThrow, - setDefaultProviderOrThrow, updateProviderOrThrow, } from "./web-search-config.ts"; -test("createDefaultWebSearchConfig builds a Tavily-first file", () => { +test("createDefaultWebSearchConfig can create a Firecrawl default provider", () => { const config = createDefaultWebSearchConfig({ - tavilyName: "tavily-main", - tavilyApiKey: "tvly-test-key", + provider: { + name: "firecrawl-main", + type: "firecrawl", + baseUrl: "https://firecrawl.internal.example/v2", + }, }); - assert.equal(config.defaultProvider, "tavily-main"); - assert.equal(config.providers[0]?.type, "tavily"); + assert.deepEqual(config, { + defaultProvider: "firecrawl-main", + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + baseUrl: "https://firecrawl.internal.example/v2", + }, + ], + }); }); -test("renameProviderOrThrow updates defaultProvider when renaming the default", () => { - const config = createDefaultWebSearchConfig({ - tavilyName: "tavily-main", - tavilyApiKey: "tvly-test-key", +test("updateProviderOrThrow allows a Firecrawl provider to omit apiKey when baseUrl exists", () => { + const next = updateProviderOrThrow( + { + defaultProvider: "firecrawl-main", + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc-test-key", + baseUrl: "https://firecrawl.internal.example/v2", + }, + { + name: "exa-fallback", + type: "exa", + apiKey: "exa-test-key", + }, + ], + }, + "firecrawl-main", + { + apiKey: "", + baseUrl: "https://firecrawl.internal.example/v2", + fallbackProviders: ["exa-fallback"], + }, + ); + + assert.deepEqual(next.providers[0], { + name: "firecrawl-main", + type: "firecrawl", + baseUrl: "https://firecrawl.internal.example/v2", + fallbackProviders: ["exa-fallback"], }); - - const next = renameProviderOrThrow(config, "tavily-main", "tavily-primary"); - - assert.equal(next.defaultProvider, "tavily-primary"); - assert.equal(next.providers[0]?.name, "tavily-primary"); }); -test("removeProviderOrThrow rejects removing the last provider", () => { - const config = createDefaultWebSearchConfig({ - tavilyName: "tavily-main", - tavilyApiKey: "tvly-test-key", - }); - - assert.throws(() => removeProviderOrThrow(config, "tavily-main"), /last provider/); -}); - -test("setDefaultProviderOrThrow requires an existing provider name", () => { - const config = createDefaultWebSearchConfig({ - tavilyName: "tavily-main", - tavilyApiKey: "tvly-test-key", - }); - - assert.throws(() => setDefaultProviderOrThrow(config, "missing"), /Unknown provider/); -}); - -test("updateProviderOrThrow can change provider-specific options without changing type", () => { - const config = createDefaultWebSearchConfig({ - tavilyName: "tavily-main", - tavilyApiKey: "tvly-test-key", - }); - - const next = updateProviderOrThrow(config, "tavily-main", { - apiKey: "tvly-next-key", - options: { defaultSearchLimit: 8 }, - }); - - assert.equal(next.providers[0]?.apiKey, "tvly-next-key"); - assert.equal(next.providers[0]?.options?.defaultSearchLimit, 8); - assert.equal(next.providers[0]?.type, "tavily"); +test("updateProviderOrThrow rejects a blank apiKey for Exa", () => { + assert.throws( + () => + updateProviderOrThrow( + { + defaultProvider: "exa-main", + providers: [ + { + name: "exa-main", + type: "exa", + apiKey: "exa-test-key", + }, + ], + }, + "exa-main", + { apiKey: "" }, + ), + /Provider apiKey cannot be blank/, + ); }); diff --git a/src/commands/web-search-config.ts b/src/commands/web-search-config.ts index 793cb52..f60309e 100644 --- a/src/commands/web-search-config.ts +++ b/src/commands/web-search-config.ts @@ -1,22 +1,60 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { getDefaultWebSearchConfigPath, + normalizeWebSearchConfig, readRawWebSearchConfig, writeWebSearchConfig, WebSearchConfigError, } from "../config.ts"; import type { WebSearchConfig, WebSearchProviderConfig } from "../schema.ts"; -export function createDefaultWebSearchConfig(input: { tavilyName: string; tavilyApiKey: string }): WebSearchConfig { +type ProviderPatch = { + apiKey?: string; + baseUrl?: string; + fallbackProviders?: string[]; + options?: WebSearchProviderConfig["options"]; +}; + +function validateProviderDraftOrThrow(provider: WebSearchProviderConfig) { + if (!provider.name.trim()) { + throw new Error("Provider name cannot be blank."); + } + + if (provider.type === "firecrawl") { + const apiKey = provider.apiKey?.trim(); + const baseUrl = provider.baseUrl?.trim(); + if (!apiKey && !baseUrl) { + throw new Error("Firecrawl provider apiKey cannot be blank unless baseUrl is set."); + } + return; + } + + if (!provider.apiKey.trim()) { + throw new Error("Provider apiKey cannot be blank."); + } +} + +function normalizeDraftConfigOrThrow(config: WebSearchConfig, path: string): WebSearchConfig { + const normalized = normalizeWebSearchConfig(config, path); return { - defaultProvider: input.tavilyName, - providers: [ - { - name: input.tavilyName, - type: "tavily", - apiKey: input.tavilyApiKey, - }, - ], + defaultProvider: normalized.defaultProviderName, + providers: normalized.providers, + }; +} + +function parseFallbackProviders(value: string) { + const items = value + .split(",") + .map((item) => item.trim()) + .filter(Boolean); + return items.length > 0 ? items : undefined; +} + +export function createDefaultWebSearchConfig(input: { provider: WebSearchProviderConfig }): WebSearchConfig { + validateProviderDraftOrThrow(input.provider); + return { + defaultProvider: input.provider.name, + providers: [input.provider], }; } @@ -41,36 +79,57 @@ export function renameProviderOrThrow( return { defaultProvider: config.defaultProvider === currentName ? nextName : config.defaultProvider, - providers: config.providers.map((provider) => - provider.name === currentName ? { ...provider, name: nextName } : provider, - ), + providers: config.providers.map((provider) => ({ + ...provider, + name: provider.name === currentName ? nextName : provider.name, + fallbackProviders: provider.fallbackProviders?.map((name) => (name === currentName ? nextName : name)), + })), }; } export function updateProviderOrThrow( config: WebSearchConfig, providerName: string, - patch: { apiKey?: string; options?: WebSearchProviderConfig["options"] }, + patch: ProviderPatch, ): WebSearchConfig { const existing = config.providers.find((provider) => provider.name === providerName); if (!existing) { throw new Error(`Unknown provider: ${providerName}`); } - if (patch.apiKey !== undefined && !patch.apiKey.trim()) { - throw new Error("Provider apiKey cannot be blank."); + + let nextProvider: WebSearchProviderConfig; + if (existing.type === "firecrawl") { + const nextBaseUrl = patch.baseUrl ?? existing.baseUrl; + const nextApiKey = patch.apiKey !== undefined ? patch.apiKey.trim() || undefined : existing.apiKey; + const nextFallbackProviders = patch.fallbackProviders ?? existing.fallbackProviders; + const nextOptions = patch.options ?? existing.options; + + nextProvider = { + name: existing.name, + type: existing.type, + ...(nextApiKey ? { apiKey: nextApiKey } : {}), + ...(nextBaseUrl ? { baseUrl: nextBaseUrl } : {}), + ...(nextFallbackProviders ? { fallbackProviders: nextFallbackProviders } : {}), + ...(nextOptions ? { options: nextOptions } : {}), + }; + } else { + if (patch.apiKey !== undefined && !patch.apiKey.trim()) { + throw new Error("Provider apiKey cannot be blank."); + } + + nextProvider = { + ...existing, + apiKey: patch.apiKey ?? existing.apiKey, + fallbackProviders: patch.fallbackProviders ?? existing.fallbackProviders, + options: patch.options ?? existing.options, + }; } + validateProviderDraftOrThrow(nextProvider); + return { ...config, - providers: config.providers.map((provider) => - provider.name === providerName - ? { - ...provider, - apiKey: patch.apiKey ?? provider.apiKey, - options: patch.options ?? provider.options, - } - : provider, - ), + providers: config.providers.map((provider) => (provider.name === providerName ? nextProvider : provider)), }; } @@ -88,12 +147,7 @@ export function removeProviderOrThrow(config: WebSearchConfig, providerName: str } function upsertProviderOrThrow(config: WebSearchConfig, nextProvider: WebSearchProviderConfig): WebSearchConfig { - if (!nextProvider.name.trim()) { - throw new Error("Provider name cannot be blank."); - } - if (!nextProvider.apiKey.trim()) { - throw new Error("Provider apiKey cannot be blank."); - } + validateProviderDraftOrThrow(nextProvider); const withoutSameName = config.providers.filter((provider) => provider.name !== nextProvider.name); return { @@ -107,6 +161,14 @@ async function promptProviderOptions(ctx: any, provider: WebSearchProviderConfig `Default search limit for ${provider.name}`, provider.options?.defaultSearchLimit !== undefined ? String(provider.options.defaultSearchLimit) : "", ); + + if (provider.type === "firecrawl") { + const options = { + defaultSearchLimit: defaultSearchLimit ? Number(defaultSearchLimit) : undefined, + }; + return options.defaultSearchLimit !== undefined ? options : undefined; + } + const defaultFetchTextMaxCharacters = await ctx.ui.input( `Default fetch text max characters for ${provider.name}`, provider.options?.defaultFetchTextMaxCharacters !== undefined @@ -114,19 +176,89 @@ async function promptProviderOptions(ctx: any, provider: WebSearchProviderConfig : "", ); + if (provider.type === "tavily") { + const options = { + defaultSearchLimit: defaultSearchLimit ? Number(defaultSearchLimit) : undefined, + defaultFetchTextMaxCharacters: defaultFetchTextMaxCharacters + ? Number(defaultFetchTextMaxCharacters) + : undefined, + }; + return Object.values(options).some((value) => value !== undefined) ? options : undefined; + } + + const defaultFetchHighlightsMaxCharacters = await ctx.ui.input( + `Default fetch highlights max characters for ${provider.name}`, + provider.options?.defaultFetchHighlightsMaxCharacters !== undefined + ? String(provider.options.defaultFetchHighlightsMaxCharacters) + : "", + ); + const options = { defaultSearchLimit: defaultSearchLimit ? Number(defaultSearchLimit) : undefined, defaultFetchTextMaxCharacters: defaultFetchTextMaxCharacters ? Number(defaultFetchTextMaxCharacters) : undefined, + defaultFetchHighlightsMaxCharacters: defaultFetchHighlightsMaxCharacters + ? Number(defaultFetchHighlightsMaxCharacters) + : undefined, }; return Object.values(options).some((value) => value !== undefined) ? options : undefined; } +async function promptFallbackProviders(ctx: any, provider: WebSearchProviderConfig) { + const value = await ctx.ui.input( + `Fallback providers for ${provider.name} (comma-separated names)`, + (provider.fallbackProviders ?? []).join(", "), + ); + return parseFallbackProviders(value ?? ""); +} + +async function promptNewProvider(ctx: any, type: WebSearchProviderConfig["type"]) { + const name = await ctx.ui.input( + "Provider name", + type === "tavily" ? "tavily-main" : type === "exa" ? "exa-fallback" : "firecrawl-main", + ); + if (!name) { + return undefined; + } + + if (type === "firecrawl") { + const baseUrl = await ctx.ui.input("Firecrawl base URL (blank uses cloud default)", ""); + const apiKey = await ctx.ui.input("Firecrawl API key (blank allowed when base URL is set)", "fc-..."); + const provider: WebSearchProviderConfig = { + name, + type, + ...(apiKey?.trim() ? { apiKey } : {}), + ...(baseUrl?.trim() ? { baseUrl } : {}), + }; + const fallbackProviders = await promptFallbackProviders(ctx, provider); + const options = await promptProviderOptions(ctx, provider); + return { + ...provider, + ...(fallbackProviders ? { fallbackProviders } : {}), + ...(options ? { options } : {}), + }; + } + + const apiKey = await ctx.ui.input(type === "tavily" ? "Tavily API key" : "Exa API key", type === "tavily" ? "tvly-..." : "exa_..."); + if (!apiKey) { + return undefined; + } + + const provider: WebSearchProviderConfig = { name, type, apiKey }; + const fallbackProviders = await promptFallbackProviders(ctx, provider); + const options = await promptProviderOptions(ctx, provider); + return { + ...provider, + ...(fallbackProviders ? { fallbackProviders } : {}), + ...(options ? { options } : {}), + }; +} + export function registerWebSearchConfigCommand(pi: ExtensionAPI) { pi.registerCommand("web-search-config", { - description: "Configure Tavily/Exa providers for web_search and web_fetch", + description: "Configure Tavily/Exa/Firecrawl providers for web_search and web_fetch", handler: async (_args, ctx) => { const path = getDefaultWebSearchConfigPath(); @@ -138,18 +270,34 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) { throw error; } - const tavilyName = await ctx.ui.input("Create Tavily provider", "tavily-main"); - const tavilyApiKey = await ctx.ui.input("Tavily API key", "tvly-..."); - if (!tavilyName || !tavilyApiKey) { + const createType = await ctx.ui.select("Create initial provider", [ + "Add Tavily provider", + "Add Exa provider", + "Add Firecrawl provider", + ]); + if (!createType) { return; } - config = createDefaultWebSearchConfig({ tavilyName, tavilyApiKey }); + + const provider = await promptNewProvider( + ctx, + createType === "Add Tavily provider" + ? "tavily" + : createType === "Add Exa provider" + ? "exa" + : "firecrawl", + ); + if (!provider) { + return; + } + config = createDefaultWebSearchConfig({ provider }); } const action = await ctx.ui.select("Web search config", [ "Set default provider", "Add Tavily provider", "Add Exa provider", + "Add Firecrawl provider", "Edit provider", "Remove provider", ]); @@ -168,22 +316,15 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) { config = setDefaultProviderOrThrow(config, nextDefault); } - if (action === "Add Tavily provider") { - const name = await ctx.ui.input("Provider name", "tavily-main"); - const apiKey = await ctx.ui.input("Tavily API key", "tvly-..."); - if (!name || !apiKey) { + if (action === "Add Tavily provider" || action === "Add Exa provider" || action === "Add Firecrawl provider") { + const provider = await promptNewProvider( + ctx, + action === "Add Tavily provider" ? "tavily" : action === "Add Exa provider" ? "exa" : "firecrawl", + ); + if (!provider) { return; } - config = upsertProviderOrThrow(config, { name, type: "tavily", apiKey }); - } - - if (action === "Add Exa provider") { - const name = await ctx.ui.input("Provider name", "exa-fallback"); - const apiKey = await ctx.ui.input("Exa API key", "exa_..."); - if (!name || !apiKey) { - return; - } - config = upsertProviderOrThrow(config, { name, type: "exa", apiKey }); + config = upsertProviderOrThrow(config, provider); } if (action === "Edit provider") { @@ -197,18 +338,38 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) { const existing = config.providers.find((provider) => provider.name === providerName)!; const nextName = await ctx.ui.input("Provider name", existing.name); - const nextApiKey = await ctx.ui.input(`API key for ${existing.name}`, existing.apiKey); - if (!nextName || !nextApiKey) { + if (!nextName) { return; } config = renameProviderOrThrow(config, existing.name, nextName); const renamed = config.providers.find((provider) => provider.name === nextName)!; + const fallbackProviders = await promptFallbackProviders(ctx, renamed); const nextOptions = await promptProviderOptions(ctx, renamed); - config = updateProviderOrThrow(config, nextName, { - apiKey: nextApiKey, - options: nextOptions, - }); + + if (renamed.type === "firecrawl") { + const nextBaseUrl = await ctx.ui.input("Firecrawl base URL (blank uses cloud default)", renamed.baseUrl ?? ""); + const nextApiKey = await ctx.ui.input( + `API key for ${renamed.name} (blank allowed when base URL is set)`, + renamed.apiKey ?? "", + ); + config = updateProviderOrThrow(config, nextName, { + apiKey: nextApiKey, + baseUrl: nextBaseUrl, + fallbackProviders, + options: nextOptions, + }); + } else { + const nextApiKey = await ctx.ui.input(`API key for ${renamed.name}`, renamed.apiKey); + if (!nextApiKey) { + return; + } + config = updateProviderOrThrow(config, nextName, { + apiKey: nextApiKey, + fallbackProviders, + options: nextOptions, + }); + } } if (action === "Remove provider") { @@ -222,7 +383,8 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) { config = removeProviderOrThrow(config, providerName); } - await writeWebSearchConfig(path, config); + const normalizedConfig = normalizeDraftConfigOrThrow(config, path); + await writeWebSearchConfig(path, normalizedConfig); ctx.ui.notify(`Saved web-search config to ${path}`, "info"); }, }); diff --git a/src/config.test.ts b/src/config.test.ts index 6a32297..bbcdee0 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -93,3 +93,101 @@ test("loadWebSearchConfig rejects a missing file with a helpful example message" error.message.includes('"providers"'), ); }); + +test("loadWebSearchConfig accepts self-hosted Firecrawl without an apiKey and normalizes its baseUrl", async () => { + const file = await writeTempConfig({ + defaultProvider: "firecrawl-main", + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + baseUrl: "https://firecrawl.internal.example/v2/", + fallbackProviders: ["exa-fallback"], + }, + { + name: "exa-fallback", + type: "exa", + apiKey: "exa-test-key", + }, + ], + }); + + const config = await loadWebSearchConfig(file); + const provider = config.providersByName.get("firecrawl-main"); + + assert.equal(provider?.type, "firecrawl"); + assert.equal(provider?.baseUrl, "https://firecrawl.internal.example/v2"); + assert.equal(provider?.apiKey, undefined); + assert.deepEqual(provider?.fallbackProviders, ["exa-fallback"]); +}); + +test("loadWebSearchConfig rejects Firecrawl cloud config without an apiKey", async () => { + const file = await writeTempConfig({ + defaultProvider: "firecrawl-main", + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + }, + ], + }); + + await assert.rejects( + () => loadWebSearchConfig(file), + (error) => + error instanceof WebSearchConfigError && + /Firecrawl provider \"firecrawl-main\"/.test(error.message) && + /apiKey/.test(error.message), + ); +}); + +test("loadWebSearchConfig rejects unknown fallback providers", async () => { + const file = await writeTempConfig({ + defaultProvider: "firecrawl-main", + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc-test-key", + fallbackProviders: ["missing-provider"], + }, + ], + }); + + await assert.rejects( + () => loadWebSearchConfig(file), + (error) => + error instanceof WebSearchConfigError && + /fallback provider/.test(error.message) && + /missing-provider/.test(error.message), + ); +}); + +test("loadWebSearchConfig rejects fallback cycles", async () => { + const file = await writeTempConfig({ + defaultProvider: "firecrawl-main", + providers: [ + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc-test-key", + fallbackProviders: ["exa-fallback"], + }, + { + name: "exa-fallback", + type: "exa", + apiKey: "exa-test-key", + fallbackProviders: ["firecrawl-main"], + }, + ], + }); + + await assert.rejects( + () => loadWebSearchConfig(file), + (error) => + error instanceof WebSearchConfigError && + /cycle/i.test(error.message) && + /firecrawl-main/.test(error.message) && + /exa-fallback/.test(error.message), + ); +}); diff --git a/src/config.ts b/src/config.ts index 9b72def..9cb4b65 100644 --- a/src/config.ts +++ b/src/config.ts @@ -4,6 +4,7 @@ import { dirname, join } from "node:path"; import { Value } from "@sinclair/typebox/value"; import { WebSearchConfigSchema, + type FirecrawlProviderConfig, type WebSearchConfig, type WebSearchProviderConfig, } from "./schema.ts"; @@ -36,6 +37,7 @@ function exampleConfigSnippet() { name: "tavily-main", type: "tavily", apiKey: "tvly-...", + fallbackProviders: ["exa-fallback"], }, { name: "exa-fallback", @@ -49,19 +51,134 @@ function exampleConfigSnippet() { ); } +function normalizeBaseUrl(value: string, path: string, providerName: string) { + let parsed: URL; + try { + parsed = new URL(value); + } catch { + throw new WebSearchConfigError(`Firecrawl provider \"${providerName}\" in ${path} has an invalid baseUrl.`); + } + + parsed.pathname = parsed.pathname.replace(/\/+$/, "") || "/"; + return parsed.toString().replace(/\/$/, ""); +} + +function normalizeFallbackProviders(provider: WebSearchProviderConfig, path: string) { + const fallbackProviders = provider.fallbackProviders?.map((name) => name.trim()); + if (!fallbackProviders) { + return undefined; + } + + if (fallbackProviders.some((name) => !name)) { + throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} contains a blank fallback provider name.`); + } + + if (new Set(fallbackProviders).size !== fallbackProviders.length) { + throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} has duplicate fallback providers.`); + } + + return fallbackProviders; +} + +function normalizeProvider(provider: WebSearchProviderConfig, path: string): WebSearchProviderConfig { + const name = provider.name.trim(); + if (!name) { + throw new WebSearchConfigError(`Provider in ${path} is missing a name.`); + } + + const fallbackProviders = normalizeFallbackProviders(provider, path); + + if (provider.type === "firecrawl") { + const apiKey = provider.apiKey?.trim() || undefined; + const baseUrl = provider.baseUrl?.trim() ? normalizeBaseUrl(provider.baseUrl.trim(), path, name) : undefined; + + if (!baseUrl && !apiKey) { + throw new WebSearchConfigError( + `Firecrawl provider \"${name}\" in ${path} requires an apiKey when baseUrl is omitted.`, + ); + } + + return { + ...(provider as FirecrawlProviderConfig), + name, + apiKey, + baseUrl, + fallbackProviders, + }; + } + + const apiKey = provider.apiKey.trim(); + if (!apiKey) { + throw new WebSearchConfigError(`Provider \"${name}\" in ${path} is missing a literal apiKey.`); + } + + return { + ...provider, + name, + apiKey, + fallbackProviders, + }; +} + +function validateFallbackGraph(providersByName: Map, path: string) { + for (const provider of providersByName.values()) { + for (const fallbackProvider of provider.fallbackProviders ?? []) { + if (fallbackProvider === provider.name) { + throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} cannot fall back to itself.`); + } + if (!providersByName.has(fallbackProvider)) { + throw new WebSearchConfigError( + `Provider \"${provider.name}\" in ${path} references unknown fallback provider \"${fallbackProvider}\".`, + ); + } + } + } + + const visited = new Set(); + const activePath: string[] = []; + const activeSet = new Set(); + + const visit = (providerName: string) => { + if (activeSet.has(providerName)) { + const cycleStart = activePath.indexOf(providerName); + const cycle = [...activePath.slice(cycleStart), providerName].join(" -> "); + throw new WebSearchConfigError(`Fallback provider cycle detected in ${path}: ${cycle}`); + } + if (visited.has(providerName)) { + return; + } + + visited.add(providerName); + activeSet.add(providerName); + activePath.push(providerName); + + const provider = providersByName.get(providerName); + for (const fallbackProvider of provider?.fallbackProviders ?? []) { + visit(fallbackProvider); + } + + activePath.pop(); + activeSet.delete(providerName); + }; + + for (const providerName of providersByName.keys()) { + visit(providerName); + } +} + export function normalizeWebSearchConfig(config: WebSearchConfig, path: string): ResolvedWebSearchConfig { const providersByName = new Map(); - for (const provider of config.providers) { - if (!provider.apiKey.trim()) { - throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} is missing a literal apiKey.`); - } + for (const rawProvider of config.providers) { + const provider = normalizeProvider(rawProvider, path); if (providersByName.has(provider.name)) { throw new WebSearchConfigError(`Duplicate provider name \"${provider.name}\" in ${path}.`); } providersByName.set(provider.name, provider); } + validateFallbackGraph(providersByName, path); + const defaultProvider = providersByName.get(config.defaultProvider); if (!defaultProvider) { throw new WebSearchConfigError( diff --git a/src/format.test.ts b/src/format.test.ts index 995eb1d..9c7488d 100644 --- a/src/format.test.ts +++ b/src/format.test.ts @@ -27,7 +27,17 @@ test("formatSearchOutput shows answer and fallback provider metadata", () => { answer: "pi is a coding agent", execution: { actualProviderName: "exa-fallback", - failoverFromProviderName: "tavily-main", + attempts: [ + { + providerName: "firecrawl-main", + status: "failed", + reason: "503 upstream unavailable", + }, + { + providerName: "exa-fallback", + status: "succeeded", + }, + ], }, results: [ { @@ -39,7 +49,7 @@ test("formatSearchOutput shows answer and fallback provider metadata", () => { } as any); assert.match(output, /Answer: pi is a coding agent/); - assert.match(output, /Fallback: tavily-main -> exa-fallback/); + assert.match(output, /Fallback: firecrawl-main -> exa-fallback/); }); test("truncateText shortens long fetch bodies with an ellipsis", () => { @@ -78,7 +88,17 @@ test("formatFetchOutput shows fallback metadata and favicon/images when present" providerName: "exa-fallback", execution: { actualProviderName: "exa-fallback", - failoverFromProviderName: "tavily-main", + attempts: [ + { + providerName: "tavily-main", + status: "failed", + reason: "503 upstream unavailable", + }, + { + providerName: "exa-fallback", + status: "succeeded", + }, + ], }, results: [ { diff --git a/src/format.ts b/src/format.ts index 09565f0..85b454a 100644 --- a/src/format.ts +++ b/src/format.ts @@ -3,11 +3,25 @@ import type { NormalizedFetchResponse, NormalizedSearchResponse } from "./provid function formatFallbackLine(execution?: { actualProviderName?: string; failoverFromProviderName?: string; + attempts?: Array<{ + providerName?: string; + status?: string; + }>; }) { - if (!execution?.failoverFromProviderName || !execution.actualProviderName) { + if (execution?.failoverFromProviderName && execution.actualProviderName) { + return `Fallback: ${execution.failoverFromProviderName} -> ${execution.actualProviderName}`; + } + + if (!execution?.actualProviderName || !execution.attempts?.length) { return undefined; } - return `Fallback: ${execution.failoverFromProviderName} -> ${execution.actualProviderName}`; + + const firstFailedAttempt = execution.attempts.find((attempt) => attempt.status === "failed"); + if (!firstFailedAttempt?.providerName || firstFailedAttempt.providerName === execution.actualProviderName) { + return undefined; + } + + return `Fallback: ${firstFailedAttempt.providerName} -> ${execution.actualProviderName}`; } export function truncateText(text: string, maxCharacters = 4000) { diff --git a/src/providers/firecrawl.test.ts b/src/providers/firecrawl.test.ts new file mode 100644 index 0000000..b74ad50 --- /dev/null +++ b/src/providers/firecrawl.test.ts @@ -0,0 +1,170 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { createFirecrawlProvider } from "./firecrawl.ts"; + +const cloudConfig = { + name: "firecrawl-main", + type: "firecrawl" as const, + apiKey: "fc-test-key", + options: { + defaultSearchLimit: 6, + }, +}; + +test("createFirecrawlProvider maps search requests to Firecrawl /search", async () => { + let capturedUrl = ""; + let capturedInit: RequestInit | undefined; + + const provider = createFirecrawlProvider(cloudConfig, async (url, init) => { + capturedUrl = String(url); + capturedInit = init; + return new Response( + JSON.stringify({ + success: true, + id: "search-1", + data: { + web: [ + { + url: "https://pi.dev", + title: "Pi Docs", + description: "Pi docs summary", + markdown: "# Pi Docs", + }, + ], + }, + }), + { status: 200 }, + ); + }); + + const result = await provider.search({ + query: "pi docs", + limit: 4, + includeDomains: ["pi.dev"], + excludeDomains: ["bad.example"], + firecrawl: { + country: "DE", + location: "Berlin, Germany", + categories: ["github"], + scrapeOptions: { + formats: ["markdown", "summary"], + }, + }, + }); + + const body = JSON.parse(String(capturedInit?.body)); + assert.equal(capturedUrl, "https://api.firecrawl.dev/v2/search"); + assert.deepEqual(capturedInit?.headers, { + "content-type": "application/json", + authorization: "Bearer fc-test-key", + }); + assert.equal(body.query, "pi docs site:pi.dev -site:bad.example"); + assert.equal(body.limit, 4); + assert.equal(body.country, "DE"); + assert.equal(body.location, "Berlin, Germany"); + assert.deepEqual(body.categories, ["github"]); + assert.deepEqual(body.scrapeOptions, { + formats: ["markdown", "summary"], + }); + assert.equal(result.requestId, "search-1"); + assert.equal(result.results[0]?.title, "Pi Docs"); + assert.equal(result.results[0]?.content, "Pi docs summary"); + assert.equal(result.results[0]?.rawContent, "# Pi Docs"); +}); + +test("createFirecrawlProvider omits auth for self-hosted baseUrl when no apiKey is configured", async () => { + let capturedUrl = ""; + let capturedInit: RequestInit | undefined; + + const provider = createFirecrawlProvider( + { + name: "firecrawl-selfhosted", + type: "firecrawl", + baseUrl: "https://firecrawl.internal.example/v2", + }, + async (url, init) => { + capturedUrl = String(url); + capturedInit = init; + return new Response( + JSON.stringify({ + success: true, + data: { + web: [], + }, + }), + { status: 200 }, + ); + }, + ); + + await provider.search({ + query: "pi docs", + }); + + assert.equal(capturedUrl, "https://firecrawl.internal.example/v2/search"); + assert.deepEqual(capturedInit?.headers, { + "content-type": "application/json", + }); +}); + +test("createFirecrawlProvider fetches each URL via /scrape and preserves per-url failures", async () => { + const calls: Array<{ url: string; init: RequestInit | undefined }> = []; + + const provider = createFirecrawlProvider(cloudConfig, async (url, init) => { + calls.push({ url: String(url), init }); + const body = JSON.parse(String(init?.body)); + + if (body.url === "https://bad.example") { + return new Response(JSON.stringify({ error: "Payment required" }), { + status: 402, + statusText: "Payment Required", + }); + } + + return new Response( + JSON.stringify({ + success: true, + data: { + metadata: { + title: "Pi", + sourceURL: body.url, + }, + markdown: "Fetched body", + summary: "Short summary", + images: ["https://pi.dev/logo.png"], + }, + }), + { status: 200 }, + ); + }); + + const result = await provider.fetch({ + urls: ["https://pi.dev", "https://bad.example"], + text: true, + summary: true, + firecrawl: { + formats: ["markdown", "summary", "images"], + }, + }); + + const firstBody = JSON.parse(String(calls[0]?.init?.body)); + assert.equal(calls[0]?.url, "https://api.firecrawl.dev/v2/scrape"); + assert.deepEqual(firstBody, { + url: "https://pi.dev", + formats: ["markdown", "summary", "images"], + }); + assert.deepEqual(result.results, [ + { + url: "https://pi.dev", + title: "Pi", + text: "Fetched body", + summary: "Short summary", + images: ["https://pi.dev/logo.png"], + }, + { + url: "https://bad.example", + title: null, + error: 'Provider "firecrawl-main" HTTP 402 Payment Required: {"error":"Payment required"}', + }, + ]); +}); diff --git a/src/providers/firecrawl.ts b/src/providers/firecrawl.ts new file mode 100644 index 0000000..5389008 --- /dev/null +++ b/src/providers/firecrawl.ts @@ -0,0 +1,198 @@ +import type { FirecrawlProviderConfig } from "../schema.ts"; +import { postJson, type ProviderFetchLike } from "./http.ts"; +import type { + NormalizedFetchRequest, + NormalizedFetchResponse, + NormalizedSearchRequest, + NormalizedSearchResponse, + WebProvider, +} from "./types.ts"; + +const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev/v2"; + +type FirecrawlSearchPayload = { + id?: string; + request_id?: string; + data?: { + web?: Array<{ + url: string; + title?: string; + description?: string; + markdown?: string; + score?: number; + published_date?: string; + images?: string[]; + }>; + }; +}; + +type FirecrawlScrapePayload = { + success?: boolean; + data?: { + markdown?: string; + summary?: string; + images?: string[]; + title?: string; + metadata?: { + title?: string; + sourceURL?: string; + }; + }; +}; + +function resolveBaseUrl(config: FirecrawlProviderConfig) { + return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL; +} + +function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) { + if ((includeDomains?.length ?? 0) > 1) { + throw new Error("Firecrawl currently supports at most one includeDomains entry."); + } + + const parts = [query.trim()]; + if (includeDomains?.[0]) { + parts.push(`site:${includeDomains[0]}`); + } + for (const domain of excludeDomains ?? []) { + parts.push(`-site:${domain}`); + } + + return parts.join(" ").trim(); +} + +function resolveSearchCategories(request: NormalizedSearchRequest) { + if (request.category && request.firecrawl?.categories?.length) { + throw new Error("Firecrawl does not accept both top-level category and firecrawl.categories."); + } + + if (request.firecrawl?.categories?.length) { + return request.firecrawl.categories; + } + + return request.category ? [request.category] : undefined; +} + +function uniqueFormats(formats: string[]) { + return [...new Set(formats)]; +} + +function resolveFetchFormats(request: NormalizedFetchRequest) { + if (request.highlights) { + throw new Error('Firecrawl does not support generic fetch option "highlights".'); + } + + const overrideFormats = request.firecrawl?.formats; + if (overrideFormats?.length) { + if (request.text && !overrideFormats.includes("markdown")) { + throw new Error('Firecrawl fetch option "text" requires firecrawl.formats to include "markdown".'); + } + if (request.summary && !overrideFormats.includes("summary")) { + throw new Error('Firecrawl fetch option "summary" requires firecrawl.formats to include "summary".'); + } + return uniqueFormats([...overrideFormats]); + } + + const formats: string[] = []; + const wantsText = request.text ?? (!request.highlights && !request.summary); + if (wantsText) { + formats.push("markdown"); + } + if (request.summary) { + formats.push("summary"); + } + + return uniqueFormats(formats.length > 0 ? formats : ["markdown"]); +} + +function pickRequestId(payload: { id?: string; request_id?: string }) { + return typeof payload.id === "string" + ? payload.id + : typeof payload.request_id === "string" + ? payload.request_id + : undefined; +} + +export function createFirecrawlProvider( + config: FirecrawlProviderConfig, + fetchImpl: ProviderFetchLike = fetch, +): WebProvider { + const baseUrl = resolveBaseUrl(config); + + return { + name: config.name, + type: config.type, + + async search(request: NormalizedSearchRequest): Promise { + const payload = await postJson({ + providerName: config.name, + baseUrl, + path: "/search", + apiKey: config.apiKey, + fetchImpl, + body: { + query: appendSearchOperators(request.query, request.includeDomains, request.excludeDomains), + limit: request.limit ?? config.options?.defaultSearchLimit ?? 5, + country: request.firecrawl?.country, + location: request.firecrawl?.location, + categories: resolveSearchCategories(request), + scrapeOptions: request.firecrawl?.scrapeOptions, + }, + }); + + return { + providerName: config.name, + requestId: pickRequestId(payload), + results: (payload.data?.web ?? []).map((item) => ({ + title: item.title ?? null, + url: item.url, + content: typeof item.description === "string" ? item.description : undefined, + rawContent: typeof item.markdown === "string" ? item.markdown : undefined, + score: item.score, + publishedDate: item.published_date, + images: Array.isArray(item.images) ? item.images : undefined, + })), + }; + }, + + async fetch(request: NormalizedFetchRequest): Promise { + const formats = resolveFetchFormats(request); + + const results = await Promise.all( + request.urls.map(async (url) => { + try { + const payload = await postJson({ + providerName: config.name, + baseUrl, + path: "/scrape", + apiKey: config.apiKey, + fetchImpl, + body: { + url, + formats, + }, + }); + + return { + url: payload.data?.metadata?.sourceURL ?? url, + title: payload.data?.metadata?.title ?? payload.data?.title ?? null, + text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined, + summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined, + images: Array.isArray(payload.data?.images) ? payload.data.images : undefined, + }; + } catch (error) { + return { + url, + title: null, + error: (error as Error).message, + }; + } + }), + ); + + return { + providerName: config.name, + results, + }; + }, + }; +} diff --git a/src/providers/http.ts b/src/providers/http.ts new file mode 100644 index 0000000..93f6cea --- /dev/null +++ b/src/providers/http.ts @@ -0,0 +1,52 @@ +export type ProviderFetchLike = (input: string, init?: RequestInit) => Promise; + +interface PostJsonOptions { + providerName: string; + baseUrl: string; + path: string; + apiKey?: string; + body: unknown; + fetchImpl?: ProviderFetchLike; +} + +export function joinApiUrl(baseUrl: string, path: string) { + const normalizedBaseUrl = baseUrl.replace(/\/+$/, ""); + const normalizedPath = path.startsWith("/") ? path : `/${path}`; + return `${normalizedBaseUrl}${normalizedPath}`; +} + +export async function readHttpError(providerName: string, response: Response): Promise { + const text = await response.text(); + throw new Error( + `Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`, + ); +} + +export async function postJson({ + providerName, + baseUrl, + path, + apiKey, + body, + fetchImpl = fetch, +}: PostJsonOptions): Promise { + const headers: Record = { + "content-type": "application/json", + }; + + if (apiKey) { + headers.authorization = `Bearer ${apiKey}`; + } + + const response = await fetchImpl(joinApiUrl(baseUrl, path), { + method: "POST", + headers, + body: JSON.stringify(body), + }); + + if (!response.ok) { + await readHttpError(providerName, response); + } + + return (await response.json()) as T; +} diff --git a/src/providers/registry.ts b/src/providers/registry.ts new file mode 100644 index 0000000..7c61bef --- /dev/null +++ b/src/providers/registry.ts @@ -0,0 +1,127 @@ +import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts"; +import { createExaProvider } from "./exa.ts"; +import { createFirecrawlProvider } from "./firecrawl.ts"; +import { createTavilyProvider } from "./tavily.ts"; +import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts"; + +export type ProviderOptionBlock = "tavily" | "firecrawl"; + +interface ProviderCapabilities { + searchOptionBlocks: ProviderOptionBlock[]; + fetchOptionBlocks: ProviderOptionBlock[]; + fetchFeatures: { + text: boolean; + highlights: boolean; + summary: boolean; + }; +} + +interface ProviderDescriptor { + type: TConfig["type"]; + capabilities: ProviderCapabilities; + createProvider(config: TConfig): WebProvider; + validateSearchRequest?(providerName: string, request: NormalizedSearchRequest): void; + validateFetchRequest?(providerName: string, request: NormalizedFetchRequest): void; +} + +const providerDescriptors = { + exa: { + type: "exa", + capabilities: { + searchOptionBlocks: [], + fetchOptionBlocks: [], + fetchFeatures: { + text: true, + highlights: true, + summary: true, + }, + }, + createProvider(config: ExaProviderConfig) { + return createExaProvider(config); + }, + }, + tavily: { + type: "tavily", + capabilities: { + searchOptionBlocks: ["tavily"], + fetchOptionBlocks: ["tavily"], + fetchFeatures: { + text: true, + highlights: true, + summary: true, + }, + }, + createProvider(config: TavilyProviderConfig) { + return createTavilyProvider(config); + }, + }, + firecrawl: { + type: "firecrawl", + capabilities: { + searchOptionBlocks: ["firecrawl"], + fetchOptionBlocks: ["firecrawl"], + fetchFeatures: { + text: true, + highlights: false, + summary: true, + }, + }, + createProvider(config: FirecrawlProviderConfig) { + return createFirecrawlProvider(config); + }, + }, +} satisfies Record; + +function validateOptionBlocks( + providerName: string, + acceptedOptionBlocks: ProviderOptionBlock[], + blocks: Partial>, +) { + for (const optionBlock of Object.keys(blocks) as ProviderOptionBlock[]) { + if (blocks[optionBlock] === undefined) { + continue; + } + if (!acceptedOptionBlocks.includes(optionBlock)) { + throw new Error(`Provider "${providerName}" does not accept the "${optionBlock}" options block.`); + } + } +} + +export function getProviderDescriptor(provider: Pick | WebSearchProviderConfig["type"]) { + const type = typeof provider === "string" ? provider : provider.type; + return providerDescriptors[type]; +} + +export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) { + const descriptor = getProviderDescriptor(providerConfig); + return descriptor.createProvider(providerConfig as never); +} + +export function validateSearchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedSearchRequest) { + const descriptor = getProviderDescriptor(providerConfig); + validateOptionBlocks(providerName, descriptor.capabilities.searchOptionBlocks, { + tavily: request.tavily, + firecrawl: request.firecrawl, + }); + descriptor.validateSearchRequest?.(providerName, request); +} + +export function validateFetchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedFetchRequest) { + const descriptor = getProviderDescriptor(providerConfig); + validateOptionBlocks(providerName, descriptor.capabilities.fetchOptionBlocks, { + tavily: request.tavily, + firecrawl: request.firecrawl, + }); + + if (request.text && !descriptor.capabilities.fetchFeatures.text) { + throw new Error(`Provider "${providerName}" does not support generic fetch option "text".`); + } + if (request.highlights && !descriptor.capabilities.fetchFeatures.highlights) { + throw new Error(`Provider "${providerName}" does not support generic fetch option "highlights".`); + } + if (request.summary && !descriptor.capabilities.fetchFeatures.summary) { + throw new Error(`Provider "${providerName}" does not support generic fetch option "summary".`); + } + + descriptor.validateFetchRequest?.(providerName, request); +} diff --git a/src/providers/tavily.ts b/src/providers/tavily.ts index c85fedd..dfe73c7 100644 --- a/src/providers/tavily.ts +++ b/src/providers/tavily.ts @@ -1,4 +1,5 @@ import type { TavilyProviderConfig } from "../schema.ts"; +import { postJson, type ProviderFetchLike } from "./http.ts"; import type { NormalizedFetchRequest, NormalizedFetchResponse, @@ -7,29 +8,22 @@ import type { WebProvider, } from "./types.ts"; -export type TavilyFetchLike = (input: string, init?: RequestInit) => Promise; - -async function readError(response: Response) { - const text = await response.text(); - throw new Error(`Tavily ${response.status} ${response.statusText}: ${text.slice(0, 300)}`); -} - export function createTavilyProvider( config: TavilyProviderConfig, - fetchImpl: TavilyFetchLike = fetch, + fetchImpl: ProviderFetchLike = fetch, ): WebProvider { return { name: config.name, type: config.type, async search(request: NormalizedSearchRequest): Promise { - const response = await fetchImpl("https://api.tavily.com/search", { - method: "POST", - headers: { - "content-type": "application/json", - authorization: `Bearer ${config.apiKey}`, - }, - body: JSON.stringify({ + const data = await postJson({ + providerName: config.name, + baseUrl: "https://api.tavily.com", + path: "/search", + apiKey: config.apiKey, + fetchImpl, + body: { query: request.query, max_results: request.limit ?? config.options?.defaultSearchLimit ?? 5, include_domains: request.includeDomains, @@ -44,14 +38,9 @@ export function createTavilyProvider( include_answer: request.tavily?.includeAnswer, include_raw_content: request.tavily?.includeRawContent, include_images: request.tavily?.includeImages, - }), + }, }); - if (!response.ok) { - await readError(response); - } - - const data = (await response.json()) as any; return { providerName: config.name, requestId: data.request_id, @@ -69,13 +58,13 @@ export function createTavilyProvider( }, async fetch(request: NormalizedFetchRequest): Promise { - const response = await fetchImpl("https://api.tavily.com/extract", { - method: "POST", - headers: { - "content-type": "application/json", - authorization: `Bearer ${config.apiKey}`, - }, - body: JSON.stringify({ + const data = await postJson({ + providerName: config.name, + baseUrl: "https://api.tavily.com", + path: "/extract", + apiKey: config.apiKey, + fetchImpl, + body: { urls: request.urls, query: request.tavily?.query, extract_depth: request.tavily?.extractDepth, @@ -83,14 +72,9 @@ export function createTavilyProvider( include_images: request.tavily?.includeImages, include_favicon: request.tavily?.includeFavicon, format: request.tavily?.format, - }), + }, }); - if (!response.ok) { - await readError(response); - } - - const data = (await response.json()) as any; return { providerName: config.name, requestIds: data.request_id ? [data.request_id] : [], diff --git a/src/providers/types.ts b/src/providers/types.ts index 2d6f265..a3296b0 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -18,6 +18,19 @@ export interface TavilyFetchOptions { format?: string; } +export interface FirecrawlSearchOptions { + country?: string; + location?: string; + categories?: string[]; + scrapeOptions?: { + formats?: Array<"markdown" | "summary">; + }; +} + +export interface FirecrawlFetchOptions { + formats?: Array<"markdown" | "summary" | "images">; +} + export interface NormalizedSearchRequest { query: string; limit?: number; @@ -28,6 +41,7 @@ export interface NormalizedSearchRequest { category?: string; provider?: string; tavily?: TavilySearchOptions; + firecrawl?: FirecrawlSearchOptions; } export interface NormalizedSearchResult { @@ -58,6 +72,7 @@ export interface NormalizedFetchRequest { textMaxCharacters?: number; provider?: string; tavily?: TavilyFetchOptions; + firecrawl?: FirecrawlFetchOptions; } export interface NormalizedFetchResult { diff --git a/src/runtime.test.ts b/src/runtime.test.ts index e452af4..99818f8 100644 --- a/src/runtime.test.ts +++ b/src/runtime.test.ts @@ -15,61 +15,54 @@ function createProvider(name: string, type: string, handlers: Partial) { }; } -test("search retries Tavily failures once with Exa", async () => { +test("search follows configured fallback chains and records every attempt", async () => { const runtime = createWebSearchRuntime({ loadConfig: async () => ({ path: "test.json", - defaultProviderName: "tavily-main", - defaultProvider: { name: "tavily-main", type: "tavily", apiKey: "tvly" }, + defaultProviderName: "firecrawl-main", + defaultProvider: { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["tavily-backup"], + }, providers: [ - { name: "tavily-main", type: "tavily", apiKey: "tvly" }, + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["tavily-backup"], + }, + { + name: "tavily-backup", + type: "tavily", + apiKey: "tvly", + fallbackProviders: ["exa-fallback"], + }, { name: "exa-fallback", type: "exa", apiKey: "exa" }, ], providersByName: new Map([ - ["tavily-main", { name: "tavily-main", type: "tavily", apiKey: "tvly" }], + [ + "firecrawl-main", + { name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["tavily-backup"] }, + ], + [ + "tavily-backup", + { name: "tavily-backup", type: "tavily", apiKey: "tvly", fallbackProviders: ["exa-fallback"] }, + ], ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], ]), }), createProvider(providerConfig) { - if (providerConfig.type === "tavily") { + if (providerConfig.name === "exa-fallback") { return createProvider(providerConfig.name, providerConfig.type, { - search: async () => { - throw new Error("503 upstream unavailable"); - }, + search: async () => ({ + providerName: providerConfig.name, + results: [{ title: "Exa hit", url: "https://exa.ai" }], + }), }); } - return createProvider(providerConfig.name, providerConfig.type, { - search: async () => ({ - providerName: providerConfig.name, - results: [{ title: "Exa hit", url: "https://exa.ai" }], - }), - }); - }, - }); - const result = await runtime.search({ query: "pi docs" }); - - assert.equal(result.execution.actualProviderName, "exa-fallback"); - assert.equal(result.execution.failoverFromProviderName, "tavily-main"); - assert.match(result.execution.failoverReason ?? "", /503/); -}); - -test("search does not retry when Exa was explicitly selected", async () => { - const runtime = createWebSearchRuntime({ - loadConfig: async () => ({ - path: "test.json", - defaultProviderName: "tavily-main", - defaultProvider: { name: "tavily-main", type: "tavily", apiKey: "tvly" }, - providers: [ - { name: "tavily-main", type: "tavily", apiKey: "tvly" }, - { name: "exa-fallback", type: "exa", apiKey: "exa" }, - ], - providersByName: new Map([ - ["tavily-main", { name: "tavily-main", type: "tavily", apiKey: "tvly" }], - ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], - ]), - }), - createProvider(providerConfig) { return createProvider(providerConfig.name, providerConfig.type, { search: async () => { throw new Error(`boom:${providerConfig.name}`); @@ -78,8 +71,136 @@ test("search does not retry when Exa was explicitly selected", async () => { }, }); - await assert.rejects( - () => runtime.search({ query: "pi docs", provider: "exa-fallback" }), - /boom:exa-fallback/, - ); + const result = await runtime.search({ query: "pi docs" }); + + assert.equal(result.execution.actualProviderName, "exa-fallback"); + assert.equal(result.execution.failoverFromProviderName, "firecrawl-main"); + assert.deepEqual(result.execution.attempts, [ + { + providerName: "firecrawl-main", + status: "failed", + reason: "boom:firecrawl-main", + }, + { + providerName: "tavily-backup", + status: "failed", + reason: "boom:tavily-backup", + }, + { + providerName: "exa-fallback", + status: "succeeded", + }, + ]); +}); + +test("search rejects a mismatched provider-specific options block before provider execution", async () => { + let callCount = 0; + + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "firecrawl-main", + defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }, + providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }], + providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]), + }), + createProvider(providerConfig) { + return createProvider(providerConfig.name, providerConfig.type, { + search: async () => { + callCount += 1; + return { + providerName: providerConfig.name, + results: [], + }; + }, + }); + }, + }); + + await assert.rejects( + () => runtime.search({ query: "pi docs", tavily: { topic: "news" } }), + /does not accept the "tavily" options block/, + ); + assert.equal(callCount, 0); +}); + +test("fetch rejects Firecrawl highlights before provider execution", async () => { + let callCount = 0; + + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "firecrawl-main", + defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }, + providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }], + providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]), + }), + createProvider(providerConfig) { + return createProvider(providerConfig.name, providerConfig.type, { + fetch: async () => { + callCount += 1; + return { + providerName: providerConfig.name, + results: [], + }; + }, + }); + }, + }); + + await assert.rejects( + () => runtime.fetch({ urls: ["https://pi.dev"], highlights: true }), + /does not support generic fetch option "highlights"/, + ); + assert.equal(callCount, 0); +}); + +test("search starts with the explicitly requested provider and still follows its fallback chain", async () => { + const calls: string[] = []; + + const runtime = createWebSearchRuntime({ + loadConfig: async () => ({ + path: "test.json", + defaultProviderName: "tavily-main", + defaultProvider: { name: "tavily-main", type: "tavily", apiKey: "tvly" }, + providers: [ + { name: "tavily-main", type: "tavily", apiKey: "tvly" }, + { + name: "firecrawl-main", + type: "firecrawl", + apiKey: "fc", + fallbackProviders: ["exa-fallback"], + }, + { name: "exa-fallback", type: "exa", apiKey: "exa" }, + ], + providersByName: new Map([ + ["tavily-main", { name: "tavily-main", type: "tavily", apiKey: "tvly" }], + [ + "firecrawl-main", + { name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] }, + ], + ["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }], + ]), + }), + createProvider(providerConfig) { + return createProvider(providerConfig.name, providerConfig.type, { + search: async () => { + calls.push(providerConfig.name); + if (providerConfig.name === "exa-fallback") { + return { + providerName: providerConfig.name, + results: [{ title: "Exa hit", url: "https://exa.ai" }], + }; + } + throw new Error(`boom:${providerConfig.name}`); + }, + }); + }, + }); + + const result = await runtime.search({ query: "pi docs", provider: "firecrawl-main" }); + + assert.deepEqual(calls, ["firecrawl-main", "exa-fallback"]); + assert.equal(result.execution.requestedProviderName, "firecrawl-main"); + assert.equal(result.execution.actualProviderName, "exa-fallback"); }); diff --git a/src/runtime.ts b/src/runtime.ts index 62c3499..52003b2 100644 --- a/src/runtime.ts +++ b/src/runtime.ts @@ -1,6 +1,9 @@ import { loadWebSearchConfig, type ResolvedWebSearchConfig } from "./config.ts"; -import { createExaProvider } from "./providers/exa.ts"; -import { createTavilyProvider } from "./providers/tavily.ts"; +import { + createProviderFromConfig, + validateFetchRequestForProvider, + validateSearchRequestForProvider, +} from "./providers/registry.ts"; import type { NormalizedFetchRequest, NormalizedFetchResponse, @@ -10,11 +13,18 @@ import type { } from "./providers/types.ts"; import type { WebSearchProviderConfig } from "./schema.ts"; +export interface ProviderExecutionAttempt { + providerName: string; + status: "failed" | "succeeded"; + reason?: string; +} + export interface ProviderExecutionMeta { requestedProviderName?: string; actualProviderName: string; failoverFromProviderName?: string; failoverReason?: string; + attempts: ProviderExecutionAttempt[]; } export interface RuntimeSearchResponse extends NormalizedSearchResponse { @@ -25,6 +35,36 @@ export interface RuntimeFetchResponse extends NormalizedFetchResponse { execution: ProviderExecutionMeta; } +function createUnknownProviderError(config: ResolvedWebSearchConfig, selectedName: string) { + return new Error( + `Unknown web-search provider \"${selectedName}\". Configured providers: ${[...config.providersByName.keys()].join(", ")}`, + ); +} + +function buildExecutionMeta( + requestedProviderName: string | undefined, + actualProviderName: string, + attempts: ProviderExecutionAttempt[], +): ProviderExecutionMeta { + const firstFailedAttempt = attempts.find((attempt) => attempt.status === "failed"); + const didFailOver = firstFailedAttempt && firstFailedAttempt.providerName !== actualProviderName; + + return { + requestedProviderName, + actualProviderName, + failoverFromProviderName: didFailOver ? firstFailedAttempt?.providerName : undefined, + failoverReason: didFailOver ? firstFailedAttempt?.reason : undefined, + attempts, + }; +} + +function attachAttempts(error: unknown, attempts: ProviderExecutionAttempt[]) { + if (error instanceof Error) { + (error as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts }; + } + return error; +} + export function createWebSearchRuntime( deps: { loadConfig?: () => Promise; @@ -32,14 +72,7 @@ export function createWebSearchRuntime( } = {}, ) { const loadConfig = deps.loadConfig ?? loadWebSearchConfig; - const createProvider = deps.createProvider ?? ((providerConfig: WebSearchProviderConfig) => { - switch (providerConfig.type) { - case "tavily": - return createTavilyProvider(providerConfig); - case "exa": - return createExaProvider(providerConfig); - } - }); + const createProvider = deps.createProvider ?? createProviderFromConfig; async function resolveConfigAndProvider(providerName?: string) { const config = await loadConfig(); @@ -47,89 +80,85 @@ export function createWebSearchRuntime( const selectedConfig = config.providersByName.get(selectedName); if (!selectedConfig) { - throw new Error( - `Unknown web-search provider \"${selectedName}\". Configured providers: ${[...config.providersByName.keys()].join(", ")}`, - ); + throw createUnknownProviderError(config, selectedName); } return { config, selectedName, selectedConfig, - selectedProvider: createProvider(selectedConfig), }; } - async function search(request: NormalizedSearchRequest): Promise { - const { config, selectedName, selectedConfig, selectedProvider } = await resolveConfigAndProvider(request.provider); + async function executeWithFailover( + request: NormalizedSearchRequest | NormalizedFetchRequest, + operation: "search" | "fetch", + ): Promise { + const { config, selectedName } = await resolveConfigAndProvider(request.provider); + const attempts: ProviderExecutionAttempt[] = []; + const pendingProviderNames = [selectedName]; + const visited = new Set(); + let lastError: unknown; - try { - const response = await selectedProvider.search(request); - return { - ...response, - execution: { - requestedProviderName: request.provider, - actualProviderName: selectedName, - }, - }; - } catch (error) { - if (selectedConfig.type !== "tavily") { - throw error; + while (pendingProviderNames.length > 0) { + const providerName = pendingProviderNames.shift(); + if (!providerName || visited.has(providerName)) { + continue; + } + visited.add(providerName); + + const providerConfig = config.providersByName.get(providerName); + if (!providerConfig) { + throw createUnknownProviderError(config, providerName); } - const fallbackConfig = [...config.providersByName.values()].find((provider) => provider.type === "exa"); - if (!fallbackConfig) { - throw error; + if (operation === "search") { + validateSearchRequestForProvider(providerName, providerConfig, request as NormalizedSearchRequest); + } else { + validateFetchRequestForProvider(providerName, providerConfig, request as NormalizedFetchRequest); } - const fallbackProvider = createProvider(fallbackConfig); - const fallbackResponse = await fallbackProvider.search({ ...request, provider: fallbackConfig.name }); - return { - ...fallbackResponse, - execution: { - requestedProviderName: request.provider, - actualProviderName: fallbackConfig.name, - failoverFromProviderName: selectedName, - failoverReason: (error as Error).message, - }, - }; + const provider = createProvider(providerConfig); + + try { + const response = await provider[operation]({ + ...request, + provider: providerName, + } as never); + attempts.push({ + providerName, + status: "succeeded", + }); + + return { + ...response, + execution: buildExecutionMeta(request.provider, providerName, attempts), + } as TResponse & { execution: ProviderExecutionMeta }; + } catch (error) { + attempts.push({ + providerName, + status: "failed", + reason: (error as Error).message, + }); + lastError = error; + + for (const fallbackProviderName of providerConfig.fallbackProviders ?? []) { + if (!visited.has(fallbackProviderName)) { + pendingProviderNames.push(fallbackProviderName); + } + } + } } + + throw attachAttempts(lastError, attempts); + } + + async function search(request: NormalizedSearchRequest): Promise { + return executeWithFailover(request, "search"); } async function fetch(request: NormalizedFetchRequest): Promise { - const { config, selectedName, selectedConfig, selectedProvider } = await resolveConfigAndProvider(request.provider); - - try { - const response = await selectedProvider.fetch(request); - return { - ...response, - execution: { - requestedProviderName: request.provider, - actualProviderName: selectedName, - }, - }; - } catch (error) { - if (selectedConfig.type !== "tavily") { - throw error; - } - - const fallbackConfig = [...config.providersByName.values()].find((provider) => provider.type === "exa"); - if (!fallbackConfig) { - throw error; - } - - const fallbackProvider = createProvider(fallbackConfig); - const fallbackResponse = await fallbackProvider.fetch({ ...request, provider: fallbackConfig.name }); - return { - ...fallbackResponse, - execution: { - requestedProviderName: request.provider, - actualProviderName: fallbackConfig.name, - failoverFromProviderName: selectedName, - failoverReason: (error as Error).message, - }, - }; - } + return executeWithFailover(request, "fetch"); } return { diff --git a/src/schema.ts b/src/schema.ts index 51bd595..7a9d32e 100644 --- a/src/schema.ts +++ b/src/schema.ts @@ -1,5 +1,8 @@ import { Type, type Static } from "@sinclair/typebox"; +const NonEmptyStringSchema = Type.String({ minLength: 1 }); +const FallbackProvidersSchema = Type.Optional(Type.Array(NonEmptyStringSchema, { minItems: 1 })); + export const ProviderOptionsSchema = Type.Object({ defaultSearchLimit: Type.Optional(Type.Integer({ minimum: 1 })), defaultFetchTextMaxCharacters: Type.Optional(Type.Integer({ minimum: 1 })), @@ -7,10 +10,11 @@ export const ProviderOptionsSchema = Type.Object({ }); export const ExaProviderConfigSchema = Type.Object({ - name: Type.String({ minLength: 1 }), + name: NonEmptyStringSchema, type: Type.Literal("exa"), - apiKey: Type.String({ minLength: 1 }), + apiKey: NonEmptyStringSchema, options: Type.Optional(ProviderOptionsSchema), + fallbackProviders: FallbackProvidersSchema, }); export const TavilyProviderOptionsSchema = Type.Object({ @@ -19,16 +23,34 @@ export const TavilyProviderOptionsSchema = Type.Object({ }); export const TavilyProviderConfigSchema = Type.Object({ - name: Type.String({ minLength: 1 }), + name: NonEmptyStringSchema, type: Type.Literal("tavily"), - apiKey: Type.String({ minLength: 1 }), + apiKey: NonEmptyStringSchema, options: Type.Optional(TavilyProviderOptionsSchema), + fallbackProviders: FallbackProvidersSchema, }); -export const WebSearchProviderConfigSchema = Type.Union([ExaProviderConfigSchema, TavilyProviderConfigSchema]); +export const FirecrawlProviderOptionsSchema = Type.Object({ + defaultSearchLimit: Type.Optional(Type.Integer({ minimum: 1 })), +}); + +export const FirecrawlProviderConfigSchema = Type.Object({ + name: NonEmptyStringSchema, + type: Type.Literal("firecrawl"), + apiKey: Type.Optional(NonEmptyStringSchema), + baseUrl: Type.Optional(NonEmptyStringSchema), + options: Type.Optional(FirecrawlProviderOptionsSchema), + fallbackProviders: FallbackProvidersSchema, +}); + +export const WebSearchProviderConfigSchema = Type.Union([ + ExaProviderConfigSchema, + TavilyProviderConfigSchema, + FirecrawlProviderConfigSchema, +]); export const WebSearchConfigSchema = Type.Object({ - defaultProvider: Type.String({ minLength: 1 }), + defaultProvider: NonEmptyStringSchema, providers: Type.Array(WebSearchProviderConfigSchema, { minItems: 1 }), }); @@ -52,6 +74,28 @@ export const TavilyFetchToolOptionsSchema = Type.Object({ format: Type.Optional(Type.String()), }); +export const FirecrawlSearchFormatSchema = Type.Union([Type.Literal("markdown"), Type.Literal("summary")]); +export const FirecrawlFetchFormatSchema = Type.Union([ + Type.Literal("markdown"), + Type.Literal("summary"), + Type.Literal("images"), +]); + +export const FirecrawlSearchToolOptionsSchema = Type.Object({ + country: Type.Optional(Type.String()), + location: Type.Optional(Type.String()), + categories: Type.Optional(Type.Array(NonEmptyStringSchema, { minItems: 1 })), + scrapeOptions: Type.Optional( + Type.Object({ + formats: Type.Optional(Type.Array(FirecrawlSearchFormatSchema, { minItems: 1 })), + }), + ), +}); + +export const FirecrawlFetchToolOptionsSchema = Type.Object({ + formats: Type.Optional(Type.Array(FirecrawlFetchFormatSchema, { minItems: 1 })), +}); + export const WebSearchParamsSchema = Type.Object({ query: Type.String({ minLength: 1, description: "Search query" }), limit: Type.Optional(Type.Integer({ minimum: 1, maximum: 25 })), @@ -62,6 +106,7 @@ export const WebSearchParamsSchema = Type.Object({ category: Type.Optional(Type.String()), provider: Type.Optional(Type.String()), tavily: Type.Optional(TavilySearchToolOptionsSchema), + firecrawl: Type.Optional(FirecrawlSearchToolOptionsSchema), }); export const WebFetchParamsSchema = Type.Object({ @@ -72,15 +117,22 @@ export const WebFetchParamsSchema = Type.Object({ textMaxCharacters: Type.Optional(Type.Integer({ minimum: 1 })), provider: Type.Optional(Type.String()), tavily: Type.Optional(TavilyFetchToolOptionsSchema), + firecrawl: Type.Optional(FirecrawlFetchToolOptionsSchema), }); export type ProviderOptions = Static; export type TavilyProviderOptions = Static; +export type FirecrawlProviderOptions = Static; export type ExaProviderConfig = Static; export type TavilyProviderConfig = Static; +export type FirecrawlProviderConfig = Static; export type WebSearchProviderConfig = Static; export type WebSearchConfig = Static; export type TavilySearchToolOptions = Static; export type TavilyFetchToolOptions = Static; +export type FirecrawlSearchFormat = Static; +export type FirecrawlFetchFormat = Static; +export type FirecrawlSearchToolOptions = Static; +export type FirecrawlFetchToolOptions = Static; export type WebSearchParams = Static; export type WebFetchParams = Static; diff --git a/src/tools/web-fetch.test.ts b/src/tools/web-fetch.test.ts index 71d2c13..16e0b17 100644 --- a/src/tools/web-fetch.test.ts +++ b/src/tools/web-fetch.test.ts @@ -2,69 +2,37 @@ import test from "node:test"; import assert from "node:assert/strict"; import { createWebFetchTool } from "./web-fetch.ts"; -test("web_fetch prepareArguments folds a single url into urls", () => { - const tool = createWebFetchTool({ - executeFetch: async () => { - throw new Error("not used"); - }, - }); - - assert.deepEqual(tool.prepareArguments?.({ url: "https://exa.ai/docs" }), { - url: "https://exa.ai/docs", - urls: ["https://exa.ai/docs"], - }); -}); - -test("web_fetch forwards nested Tavily extract options to the runtime", async () => { - let capturedRequest: any; +test("createWebFetchTool passes Firecrawl fetch options through to the runtime", async () => { + let captured: any; const tool = createWebFetchTool({ - executeFetch: async (request) => { - capturedRequest = request; + async executeFetch(request) { + captured = request; return { - providerName: "tavily-main", - results: [ - { - url: "https://pi.dev", - title: "Docs", - text: "Body", - }, - ], - execution: { actualProviderName: "tavily-main" }, + providerName: "firecrawl-main", + results: [], }; }, }); - const result = await tool.execute( - "tool-1", - { - urls: ["https://pi.dev"], - tavily: { - query: "installation", - extractDepth: "advanced", - includeImages: true, - }, + await tool.execute("tool-call", { + urls: ["https://pi.dev"], + provider: "firecrawl-main", + firecrawl: { + formats: ["markdown", "summary", "images"], }, - undefined, - undefined, - undefined, - ); + } as any); - assert.equal(capturedRequest.tavily.query, "installation"); - assert.equal(capturedRequest.tavily.extractDepth, "advanced"); - assert.equal(capturedRequest.text, true); - assert.match((result.content[0] as { text: string }).text, /Body/); -}); - -test("web_fetch rejects malformed URLs", async () => { - const tool = createWebFetchTool({ - executeFetch: async () => { - throw new Error("should not execute fetch for invalid URLs"); + assert.deepEqual(captured, { + urls: ["https://pi.dev/"], + text: true, + highlights: false, + summary: false, + textMaxCharacters: undefined, + provider: "firecrawl-main", + tavily: undefined, + firecrawl: { + formats: ["markdown", "summary", "images"], }, }); - - await assert.rejects( - () => tool.execute("tool-1", { urls: ["not-a-url"] }, undefined, undefined, undefined), - /Invalid URL/, - ); }); diff --git a/src/tools/web-fetch.ts b/src/tools/web-fetch.ts index 08d2443..1215062 100644 --- a/src/tools/web-fetch.ts +++ b/src/tools/web-fetch.ts @@ -29,6 +29,7 @@ function normalizeFetchParams(params: WebFetchParams & { url?: string }) { textMaxCharacters: params.textMaxCharacters, provider: params.provider, tavily: params.tavily, + firecrawl: params.firecrawl, }; } diff --git a/src/tools/web-search.test.ts b/src/tools/web-search.test.ts index 25af609..2ddc1fc 100644 --- a/src/tools/web-search.test.ts +++ b/src/tools/web-search.test.ts @@ -2,54 +2,47 @@ import test from "node:test"; import assert from "node:assert/strict"; import { createWebSearchTool } from "./web-search.ts"; -test("web_search forwards nested Tavily options to the runtime", async () => { - let capturedRequest: any; +test("createWebSearchTool passes Firecrawl search options through to the runtime", async () => { + let captured: any; const tool = createWebSearchTool({ - executeSearch: async (request) => { - capturedRequest = request; + async executeSearch(request) { + captured = request; return { - providerName: "tavily-main", - results: [ - { - title: "Docs", - url: "https://pi.dev", - }, - ], - execution: { actualProviderName: "tavily-main" }, + providerName: "firecrawl-main", + results: [], }; }, }); - const result = await tool.execute( - "tool-1", - { - query: "pi docs", - tavily: { - includeAnswer: true, - includeRawContent: true, - searchDepth: "advanced", + await tool.execute("tool-call", { + query: "pi docs", + provider: "firecrawl-main", + firecrawl: { + country: "DE", + categories: ["github"], + scrapeOptions: { + formats: ["markdown"], }, }, - undefined, - undefined, - undefined, - ); + } as any); - assert.equal(capturedRequest.tavily.includeAnswer, true); - assert.equal(capturedRequest.tavily.searchDepth, "advanced"); - assert.match((result.content[0] as { text: string }).text, /Docs/); -}); - -test("web_search rejects a blank query before resolving a provider", async () => { - const tool = createWebSearchTool({ - executeSearch: async () => { - throw new Error("should not execute search for a blank query"); + assert.deepEqual(captured, { + query: "pi docs", + limit: undefined, + includeDomains: undefined, + excludeDomains: undefined, + startPublishedDate: undefined, + endPublishedDate: undefined, + category: undefined, + provider: "firecrawl-main", + tavily: undefined, + firecrawl: { + country: "DE", + categories: ["github"], + scrapeOptions: { + formats: ["markdown"], + }, }, }); - - await assert.rejects( - () => tool.execute("tool-1", { query: " " }, undefined, undefined, undefined), - /non-empty query/, - ); }); diff --git a/src/tools/web-search.ts b/src/tools/web-search.ts index 0683394..db0dac5 100644 --- a/src/tools/web-search.ts +++ b/src/tools/web-search.ts @@ -34,6 +34,7 @@ export function createWebSearchTool({ executeSearch }: SearchToolDeps) { category: params.category, provider: params.provider, tavily: params.tavily, + firecrawl: params.firecrawl, }); return {