feat: add Firecrawl provider support

This commit is contained in:
pi
2026-04-12 02:53:10 +01:00
parent 01d4411903
commit 98a966cade
20 changed files with 1570 additions and 366 deletions

View File

@@ -1,6 +1,6 @@
# pi-web-search
`pi-web-search` is a Pi extension package that adds `web_search` and `web_fetch` tools backed by pluggable providers such as Exa and Tavily.
`pi-web-search` is a Pi extension package that adds `web_search` and `web_fetch` tools backed by pluggable providers such as Exa, Tavily, and Firecrawl.
## Install
@@ -24,6 +24,71 @@ pi install https://gitea.rwiesner.com/pi/pi-web-search
Provider configuration is managed by the extension's own commands and config files.
Example `~/.pi/agent/web-search.json`:
```json
{
"defaultProvider": "firecrawl-main",
"providers": [
{
"name": "firecrawl-main",
"type": "firecrawl",
"apiKey": "fc-...",
"fallbackProviders": ["exa-fallback"]
},
{
"name": "exa-fallback",
"type": "exa",
"apiKey": "exa_..."
}
]
}
```
Self-hosted Firecrawl:
```json
{
"defaultProvider": "firecrawl-selfhosted",
"providers": [
{
"name": "firecrawl-selfhosted",
"type": "firecrawl",
"baseUrl": "https://firecrawl.internal.example/v2"
}
]
}
```
Tool examples:
```json
{
"query": "pi docs",
"provider": "firecrawl-main",
"firecrawl": {
"country": "DE",
"categories": ["github"],
"scrapeOptions": {
"formats": ["markdown"]
}
}
}
```
```json
{
"urls": ["https://pi.dev"],
"provider": "firecrawl-main",
"summary": true,
"firecrawl": {
"formats": ["markdown", "summary", "images"]
}
}
```
Run `web-search-config` inside Pi to add or edit Tavily, Exa, and Firecrawl providers interactively.
## Development
```bash

View File

@@ -2,64 +2,81 @@ import test from "node:test";
import assert from "node:assert/strict";
import {
createDefaultWebSearchConfig,
removeProviderOrThrow,
renameProviderOrThrow,
setDefaultProviderOrThrow,
updateProviderOrThrow,
} from "./web-search-config.ts";
test("createDefaultWebSearchConfig builds a Tavily-first file", () => {
test("createDefaultWebSearchConfig can create a Firecrawl default provider", () => {
const config = createDefaultWebSearchConfig({
tavilyName: "tavily-main",
tavilyApiKey: "tvly-test-key",
provider: {
name: "firecrawl-main",
type: "firecrawl",
baseUrl: "https://firecrawl.internal.example/v2",
},
});
assert.equal(config.defaultProvider, "tavily-main");
assert.equal(config.providers[0]?.type, "tavily");
assert.deepEqual(config, {
defaultProvider: "firecrawl-main",
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
baseUrl: "https://firecrawl.internal.example/v2",
},
],
});
});
test("renameProviderOrThrow updates defaultProvider when renaming the default", () => {
const config = createDefaultWebSearchConfig({
tavilyName: "tavily-main",
tavilyApiKey: "tvly-test-key",
test("updateProviderOrThrow allows a Firecrawl provider to omit apiKey when baseUrl exists", () => {
const next = updateProviderOrThrow(
{
defaultProvider: "firecrawl-main",
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc-test-key",
baseUrl: "https://firecrawl.internal.example/v2",
},
{
name: "exa-fallback",
type: "exa",
apiKey: "exa-test-key",
},
],
},
"firecrawl-main",
{
apiKey: "",
baseUrl: "https://firecrawl.internal.example/v2",
fallbackProviders: ["exa-fallback"],
},
);
assert.deepEqual(next.providers[0], {
name: "firecrawl-main",
type: "firecrawl",
baseUrl: "https://firecrawl.internal.example/v2",
fallbackProviders: ["exa-fallback"],
});
const next = renameProviderOrThrow(config, "tavily-main", "tavily-primary");
assert.equal(next.defaultProvider, "tavily-primary");
assert.equal(next.providers[0]?.name, "tavily-primary");
});
test("removeProviderOrThrow rejects removing the last provider", () => {
const config = createDefaultWebSearchConfig({
tavilyName: "tavily-main",
tavilyApiKey: "tvly-test-key",
});
assert.throws(() => removeProviderOrThrow(config, "tavily-main"), /last provider/);
});
test("setDefaultProviderOrThrow requires an existing provider name", () => {
const config = createDefaultWebSearchConfig({
tavilyName: "tavily-main",
tavilyApiKey: "tvly-test-key",
});
assert.throws(() => setDefaultProviderOrThrow(config, "missing"), /Unknown provider/);
});
test("updateProviderOrThrow can change provider-specific options without changing type", () => {
const config = createDefaultWebSearchConfig({
tavilyName: "tavily-main",
tavilyApiKey: "tvly-test-key",
});
const next = updateProviderOrThrow(config, "tavily-main", {
apiKey: "tvly-next-key",
options: { defaultSearchLimit: 8 },
});
assert.equal(next.providers[0]?.apiKey, "tvly-next-key");
assert.equal(next.providers[0]?.options?.defaultSearchLimit, 8);
assert.equal(next.providers[0]?.type, "tavily");
test("updateProviderOrThrow rejects a blank apiKey for Exa", () => {
assert.throws(
() =>
updateProviderOrThrow(
{
defaultProvider: "exa-main",
providers: [
{
name: "exa-main",
type: "exa",
apiKey: "exa-test-key",
},
],
},
"exa-main",
{ apiKey: "" },
),
/Provider apiKey cannot be blank/,
);
});

View File

@@ -1,22 +1,60 @@
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import {
getDefaultWebSearchConfigPath,
normalizeWebSearchConfig,
readRawWebSearchConfig,
writeWebSearchConfig,
WebSearchConfigError,
} from "../config.ts";
import type { WebSearchConfig, WebSearchProviderConfig } from "../schema.ts";
export function createDefaultWebSearchConfig(input: { tavilyName: string; tavilyApiKey: string }): WebSearchConfig {
type ProviderPatch = {
apiKey?: string;
baseUrl?: string;
fallbackProviders?: string[];
options?: WebSearchProviderConfig["options"];
};
function validateProviderDraftOrThrow(provider: WebSearchProviderConfig) {
if (!provider.name.trim()) {
throw new Error("Provider name cannot be blank.");
}
if (provider.type === "firecrawl") {
const apiKey = provider.apiKey?.trim();
const baseUrl = provider.baseUrl?.trim();
if (!apiKey && !baseUrl) {
throw new Error("Firecrawl provider apiKey cannot be blank unless baseUrl is set.");
}
return;
}
if (!provider.apiKey.trim()) {
throw new Error("Provider apiKey cannot be blank.");
}
}
function normalizeDraftConfigOrThrow(config: WebSearchConfig, path: string): WebSearchConfig {
const normalized = normalizeWebSearchConfig(config, path);
return {
defaultProvider: input.tavilyName,
providers: [
{
name: input.tavilyName,
type: "tavily",
apiKey: input.tavilyApiKey,
},
],
defaultProvider: normalized.defaultProviderName,
providers: normalized.providers,
};
}
function parseFallbackProviders(value: string) {
const items = value
.split(",")
.map((item) => item.trim())
.filter(Boolean);
return items.length > 0 ? items : undefined;
}
export function createDefaultWebSearchConfig(input: { provider: WebSearchProviderConfig }): WebSearchConfig {
validateProviderDraftOrThrow(input.provider);
return {
defaultProvider: input.provider.name,
providers: [input.provider],
};
}
@@ -41,36 +79,57 @@ export function renameProviderOrThrow(
return {
defaultProvider: config.defaultProvider === currentName ? nextName : config.defaultProvider,
providers: config.providers.map((provider) =>
provider.name === currentName ? { ...provider, name: nextName } : provider,
),
providers: config.providers.map((provider) => ({
...provider,
name: provider.name === currentName ? nextName : provider.name,
fallbackProviders: provider.fallbackProviders?.map((name) => (name === currentName ? nextName : name)),
})),
};
}
export function updateProviderOrThrow(
config: WebSearchConfig,
providerName: string,
patch: { apiKey?: string; options?: WebSearchProviderConfig["options"] },
patch: ProviderPatch,
): WebSearchConfig {
const existing = config.providers.find((provider) => provider.name === providerName);
if (!existing) {
throw new Error(`Unknown provider: ${providerName}`);
}
if (patch.apiKey !== undefined && !patch.apiKey.trim()) {
throw new Error("Provider apiKey cannot be blank.");
let nextProvider: WebSearchProviderConfig;
if (existing.type === "firecrawl") {
const nextBaseUrl = patch.baseUrl ?? existing.baseUrl;
const nextApiKey = patch.apiKey !== undefined ? patch.apiKey.trim() || undefined : existing.apiKey;
const nextFallbackProviders = patch.fallbackProviders ?? existing.fallbackProviders;
const nextOptions = patch.options ?? existing.options;
nextProvider = {
name: existing.name,
type: existing.type,
...(nextApiKey ? { apiKey: nextApiKey } : {}),
...(nextBaseUrl ? { baseUrl: nextBaseUrl } : {}),
...(nextFallbackProviders ? { fallbackProviders: nextFallbackProviders } : {}),
...(nextOptions ? { options: nextOptions } : {}),
};
} else {
if (patch.apiKey !== undefined && !patch.apiKey.trim()) {
throw new Error("Provider apiKey cannot be blank.");
}
nextProvider = {
...existing,
apiKey: patch.apiKey ?? existing.apiKey,
fallbackProviders: patch.fallbackProviders ?? existing.fallbackProviders,
options: patch.options ?? existing.options,
};
}
validateProviderDraftOrThrow(nextProvider);
return {
...config,
providers: config.providers.map((provider) =>
provider.name === providerName
? {
...provider,
apiKey: patch.apiKey ?? provider.apiKey,
options: patch.options ?? provider.options,
}
: provider,
),
providers: config.providers.map((provider) => (provider.name === providerName ? nextProvider : provider)),
};
}
@@ -88,12 +147,7 @@ export function removeProviderOrThrow(config: WebSearchConfig, providerName: str
}
function upsertProviderOrThrow(config: WebSearchConfig, nextProvider: WebSearchProviderConfig): WebSearchConfig {
if (!nextProvider.name.trim()) {
throw new Error("Provider name cannot be blank.");
}
if (!nextProvider.apiKey.trim()) {
throw new Error("Provider apiKey cannot be blank.");
}
validateProviderDraftOrThrow(nextProvider);
const withoutSameName = config.providers.filter((provider) => provider.name !== nextProvider.name);
return {
@@ -107,6 +161,14 @@ async function promptProviderOptions(ctx: any, provider: WebSearchProviderConfig
`Default search limit for ${provider.name}`,
provider.options?.defaultSearchLimit !== undefined ? String(provider.options.defaultSearchLimit) : "",
);
if (provider.type === "firecrawl") {
const options = {
defaultSearchLimit: defaultSearchLimit ? Number(defaultSearchLimit) : undefined,
};
return options.defaultSearchLimit !== undefined ? options : undefined;
}
const defaultFetchTextMaxCharacters = await ctx.ui.input(
`Default fetch text max characters for ${provider.name}`,
provider.options?.defaultFetchTextMaxCharacters !== undefined
@@ -114,19 +176,89 @@ async function promptProviderOptions(ctx: any, provider: WebSearchProviderConfig
: "",
);
if (provider.type === "tavily") {
const options = {
defaultSearchLimit: defaultSearchLimit ? Number(defaultSearchLimit) : undefined,
defaultFetchTextMaxCharacters: defaultFetchTextMaxCharacters
? Number(defaultFetchTextMaxCharacters)
: undefined,
};
return Object.values(options).some((value) => value !== undefined) ? options : undefined;
}
const defaultFetchHighlightsMaxCharacters = await ctx.ui.input(
`Default fetch highlights max characters for ${provider.name}`,
provider.options?.defaultFetchHighlightsMaxCharacters !== undefined
? String(provider.options.defaultFetchHighlightsMaxCharacters)
: "",
);
const options = {
defaultSearchLimit: defaultSearchLimit ? Number(defaultSearchLimit) : undefined,
defaultFetchTextMaxCharacters: defaultFetchTextMaxCharacters
? Number(defaultFetchTextMaxCharacters)
: undefined,
defaultFetchHighlightsMaxCharacters: defaultFetchHighlightsMaxCharacters
? Number(defaultFetchHighlightsMaxCharacters)
: undefined,
};
return Object.values(options).some((value) => value !== undefined) ? options : undefined;
}
async function promptFallbackProviders(ctx: any, provider: WebSearchProviderConfig) {
const value = await ctx.ui.input(
`Fallback providers for ${provider.name} (comma-separated names)`,
(provider.fallbackProviders ?? []).join(", "),
);
return parseFallbackProviders(value ?? "");
}
async function promptNewProvider(ctx: any, type: WebSearchProviderConfig["type"]) {
const name = await ctx.ui.input(
"Provider name",
type === "tavily" ? "tavily-main" : type === "exa" ? "exa-fallback" : "firecrawl-main",
);
if (!name) {
return undefined;
}
if (type === "firecrawl") {
const baseUrl = await ctx.ui.input("Firecrawl base URL (blank uses cloud default)", "");
const apiKey = await ctx.ui.input("Firecrawl API key (blank allowed when base URL is set)", "fc-...");
const provider: WebSearchProviderConfig = {
name,
type,
...(apiKey?.trim() ? { apiKey } : {}),
...(baseUrl?.trim() ? { baseUrl } : {}),
};
const fallbackProviders = await promptFallbackProviders(ctx, provider);
const options = await promptProviderOptions(ctx, provider);
return {
...provider,
...(fallbackProviders ? { fallbackProviders } : {}),
...(options ? { options } : {}),
};
}
const apiKey = await ctx.ui.input(type === "tavily" ? "Tavily API key" : "Exa API key", type === "tavily" ? "tvly-..." : "exa_...");
if (!apiKey) {
return undefined;
}
const provider: WebSearchProviderConfig = { name, type, apiKey };
const fallbackProviders = await promptFallbackProviders(ctx, provider);
const options = await promptProviderOptions(ctx, provider);
return {
...provider,
...(fallbackProviders ? { fallbackProviders } : {}),
...(options ? { options } : {}),
};
}
export function registerWebSearchConfigCommand(pi: ExtensionAPI) {
pi.registerCommand("web-search-config", {
description: "Configure Tavily/Exa providers for web_search and web_fetch",
description: "Configure Tavily/Exa/Firecrawl providers for web_search and web_fetch",
handler: async (_args, ctx) => {
const path = getDefaultWebSearchConfigPath();
@@ -138,18 +270,34 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) {
throw error;
}
const tavilyName = await ctx.ui.input("Create Tavily provider", "tavily-main");
const tavilyApiKey = await ctx.ui.input("Tavily API key", "tvly-...");
if (!tavilyName || !tavilyApiKey) {
const createType = await ctx.ui.select("Create initial provider", [
"Add Tavily provider",
"Add Exa provider",
"Add Firecrawl provider",
]);
if (!createType) {
return;
}
config = createDefaultWebSearchConfig({ tavilyName, tavilyApiKey });
const provider = await promptNewProvider(
ctx,
createType === "Add Tavily provider"
? "tavily"
: createType === "Add Exa provider"
? "exa"
: "firecrawl",
);
if (!provider) {
return;
}
config = createDefaultWebSearchConfig({ provider });
}
const action = await ctx.ui.select("Web search config", [
"Set default provider",
"Add Tavily provider",
"Add Exa provider",
"Add Firecrawl provider",
"Edit provider",
"Remove provider",
]);
@@ -168,22 +316,15 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) {
config = setDefaultProviderOrThrow(config, nextDefault);
}
if (action === "Add Tavily provider") {
const name = await ctx.ui.input("Provider name", "tavily-main");
const apiKey = await ctx.ui.input("Tavily API key", "tvly-...");
if (!name || !apiKey) {
if (action === "Add Tavily provider" || action === "Add Exa provider" || action === "Add Firecrawl provider") {
const provider = await promptNewProvider(
ctx,
action === "Add Tavily provider" ? "tavily" : action === "Add Exa provider" ? "exa" : "firecrawl",
);
if (!provider) {
return;
}
config = upsertProviderOrThrow(config, { name, type: "tavily", apiKey });
}
if (action === "Add Exa provider") {
const name = await ctx.ui.input("Provider name", "exa-fallback");
const apiKey = await ctx.ui.input("Exa API key", "exa_...");
if (!name || !apiKey) {
return;
}
config = upsertProviderOrThrow(config, { name, type: "exa", apiKey });
config = upsertProviderOrThrow(config, provider);
}
if (action === "Edit provider") {
@@ -197,18 +338,38 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) {
const existing = config.providers.find((provider) => provider.name === providerName)!;
const nextName = await ctx.ui.input("Provider name", existing.name);
const nextApiKey = await ctx.ui.input(`API key for ${existing.name}`, existing.apiKey);
if (!nextName || !nextApiKey) {
if (!nextName) {
return;
}
config = renameProviderOrThrow(config, existing.name, nextName);
const renamed = config.providers.find((provider) => provider.name === nextName)!;
const fallbackProviders = await promptFallbackProviders(ctx, renamed);
const nextOptions = await promptProviderOptions(ctx, renamed);
config = updateProviderOrThrow(config, nextName, {
apiKey: nextApiKey,
options: nextOptions,
});
if (renamed.type === "firecrawl") {
const nextBaseUrl = await ctx.ui.input("Firecrawl base URL (blank uses cloud default)", renamed.baseUrl ?? "");
const nextApiKey = await ctx.ui.input(
`API key for ${renamed.name} (blank allowed when base URL is set)`,
renamed.apiKey ?? "",
);
config = updateProviderOrThrow(config, nextName, {
apiKey: nextApiKey,
baseUrl: nextBaseUrl,
fallbackProviders,
options: nextOptions,
});
} else {
const nextApiKey = await ctx.ui.input(`API key for ${renamed.name}`, renamed.apiKey);
if (!nextApiKey) {
return;
}
config = updateProviderOrThrow(config, nextName, {
apiKey: nextApiKey,
fallbackProviders,
options: nextOptions,
});
}
}
if (action === "Remove provider") {
@@ -222,7 +383,8 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI) {
config = removeProviderOrThrow(config, providerName);
}
await writeWebSearchConfig(path, config);
const normalizedConfig = normalizeDraftConfigOrThrow(config, path);
await writeWebSearchConfig(path, normalizedConfig);
ctx.ui.notify(`Saved web-search config to ${path}`, "info");
},
});

View File

@@ -93,3 +93,101 @@ test("loadWebSearchConfig rejects a missing file with a helpful example message"
error.message.includes('"providers"'),
);
});
test("loadWebSearchConfig accepts self-hosted Firecrawl without an apiKey and normalizes its baseUrl", async () => {
const file = await writeTempConfig({
defaultProvider: "firecrawl-main",
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
baseUrl: "https://firecrawl.internal.example/v2/",
fallbackProviders: ["exa-fallback"],
},
{
name: "exa-fallback",
type: "exa",
apiKey: "exa-test-key",
},
],
});
const config = await loadWebSearchConfig(file);
const provider = config.providersByName.get("firecrawl-main");
assert.equal(provider?.type, "firecrawl");
assert.equal(provider?.baseUrl, "https://firecrawl.internal.example/v2");
assert.equal(provider?.apiKey, undefined);
assert.deepEqual(provider?.fallbackProviders, ["exa-fallback"]);
});
test("loadWebSearchConfig rejects Firecrawl cloud config without an apiKey", async () => {
const file = await writeTempConfig({
defaultProvider: "firecrawl-main",
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
},
],
});
await assert.rejects(
() => loadWebSearchConfig(file),
(error) =>
error instanceof WebSearchConfigError &&
/Firecrawl provider \"firecrawl-main\"/.test(error.message) &&
/apiKey/.test(error.message),
);
});
test("loadWebSearchConfig rejects unknown fallback providers", async () => {
const file = await writeTempConfig({
defaultProvider: "firecrawl-main",
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc-test-key",
fallbackProviders: ["missing-provider"],
},
],
});
await assert.rejects(
() => loadWebSearchConfig(file),
(error) =>
error instanceof WebSearchConfigError &&
/fallback provider/.test(error.message) &&
/missing-provider/.test(error.message),
);
});
test("loadWebSearchConfig rejects fallback cycles", async () => {
const file = await writeTempConfig({
defaultProvider: "firecrawl-main",
providers: [
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc-test-key",
fallbackProviders: ["exa-fallback"],
},
{
name: "exa-fallback",
type: "exa",
apiKey: "exa-test-key",
fallbackProviders: ["firecrawl-main"],
},
],
});
await assert.rejects(
() => loadWebSearchConfig(file),
(error) =>
error instanceof WebSearchConfigError &&
/cycle/i.test(error.message) &&
/firecrawl-main/.test(error.message) &&
/exa-fallback/.test(error.message),
);
});

View File

@@ -4,6 +4,7 @@ import { dirname, join } from "node:path";
import { Value } from "@sinclair/typebox/value";
import {
WebSearchConfigSchema,
type FirecrawlProviderConfig,
type WebSearchConfig,
type WebSearchProviderConfig,
} from "./schema.ts";
@@ -36,6 +37,7 @@ function exampleConfigSnippet() {
name: "tavily-main",
type: "tavily",
apiKey: "tvly-...",
fallbackProviders: ["exa-fallback"],
},
{
name: "exa-fallback",
@@ -49,19 +51,134 @@ function exampleConfigSnippet() {
);
}
function normalizeBaseUrl(value: string, path: string, providerName: string) {
let parsed: URL;
try {
parsed = new URL(value);
} catch {
throw new WebSearchConfigError(`Firecrawl provider \"${providerName}\" in ${path} has an invalid baseUrl.`);
}
parsed.pathname = parsed.pathname.replace(/\/+$/, "") || "/";
return parsed.toString().replace(/\/$/, "");
}
function normalizeFallbackProviders(provider: WebSearchProviderConfig, path: string) {
const fallbackProviders = provider.fallbackProviders?.map((name) => name.trim());
if (!fallbackProviders) {
return undefined;
}
if (fallbackProviders.some((name) => !name)) {
throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} contains a blank fallback provider name.`);
}
if (new Set(fallbackProviders).size !== fallbackProviders.length) {
throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} has duplicate fallback providers.`);
}
return fallbackProviders;
}
function normalizeProvider(provider: WebSearchProviderConfig, path: string): WebSearchProviderConfig {
const name = provider.name.trim();
if (!name) {
throw new WebSearchConfigError(`Provider in ${path} is missing a name.`);
}
const fallbackProviders = normalizeFallbackProviders(provider, path);
if (provider.type === "firecrawl") {
const apiKey = provider.apiKey?.trim() || undefined;
const baseUrl = provider.baseUrl?.trim() ? normalizeBaseUrl(provider.baseUrl.trim(), path, name) : undefined;
if (!baseUrl && !apiKey) {
throw new WebSearchConfigError(
`Firecrawl provider \"${name}\" in ${path} requires an apiKey when baseUrl is omitted.`,
);
}
return {
...(provider as FirecrawlProviderConfig),
name,
apiKey,
baseUrl,
fallbackProviders,
};
}
const apiKey = provider.apiKey.trim();
if (!apiKey) {
throw new WebSearchConfigError(`Provider \"${name}\" in ${path} is missing a literal apiKey.`);
}
return {
...provider,
name,
apiKey,
fallbackProviders,
};
}
function validateFallbackGraph(providersByName: Map<string, WebSearchProviderConfig>, path: string) {
for (const provider of providersByName.values()) {
for (const fallbackProvider of provider.fallbackProviders ?? []) {
if (fallbackProvider === provider.name) {
throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} cannot fall back to itself.`);
}
if (!providersByName.has(fallbackProvider)) {
throw new WebSearchConfigError(
`Provider \"${provider.name}\" in ${path} references unknown fallback provider \"${fallbackProvider}\".`,
);
}
}
}
const visited = new Set<string>();
const activePath: string[] = [];
const activeSet = new Set<string>();
const visit = (providerName: string) => {
if (activeSet.has(providerName)) {
const cycleStart = activePath.indexOf(providerName);
const cycle = [...activePath.slice(cycleStart), providerName].join(" -> ");
throw new WebSearchConfigError(`Fallback provider cycle detected in ${path}: ${cycle}`);
}
if (visited.has(providerName)) {
return;
}
visited.add(providerName);
activeSet.add(providerName);
activePath.push(providerName);
const provider = providersByName.get(providerName);
for (const fallbackProvider of provider?.fallbackProviders ?? []) {
visit(fallbackProvider);
}
activePath.pop();
activeSet.delete(providerName);
};
for (const providerName of providersByName.keys()) {
visit(providerName);
}
}
export function normalizeWebSearchConfig(config: WebSearchConfig, path: string): ResolvedWebSearchConfig {
const providersByName = new Map<string, WebSearchProviderConfig>();
for (const provider of config.providers) {
if (!provider.apiKey.trim()) {
throw new WebSearchConfigError(`Provider \"${provider.name}\" in ${path} is missing a literal apiKey.`);
}
for (const rawProvider of config.providers) {
const provider = normalizeProvider(rawProvider, path);
if (providersByName.has(provider.name)) {
throw new WebSearchConfigError(`Duplicate provider name \"${provider.name}\" in ${path}.`);
}
providersByName.set(provider.name, provider);
}
validateFallbackGraph(providersByName, path);
const defaultProvider = providersByName.get(config.defaultProvider);
if (!defaultProvider) {
throw new WebSearchConfigError(

View File

@@ -27,7 +27,17 @@ test("formatSearchOutput shows answer and fallback provider metadata", () => {
answer: "pi is a coding agent",
execution: {
actualProviderName: "exa-fallback",
failoverFromProviderName: "tavily-main",
attempts: [
{
providerName: "firecrawl-main",
status: "failed",
reason: "503 upstream unavailable",
},
{
providerName: "exa-fallback",
status: "succeeded",
},
],
},
results: [
{
@@ -39,7 +49,7 @@ test("formatSearchOutput shows answer and fallback provider metadata", () => {
} as any);
assert.match(output, /Answer: pi is a coding agent/);
assert.match(output, /Fallback: tavily-main -> exa-fallback/);
assert.match(output, /Fallback: firecrawl-main -> exa-fallback/);
});
test("truncateText shortens long fetch bodies with an ellipsis", () => {
@@ -78,7 +88,17 @@ test("formatFetchOutput shows fallback metadata and favicon/images when present"
providerName: "exa-fallback",
execution: {
actualProviderName: "exa-fallback",
failoverFromProviderName: "tavily-main",
attempts: [
{
providerName: "tavily-main",
status: "failed",
reason: "503 upstream unavailable",
},
{
providerName: "exa-fallback",
status: "succeeded",
},
],
},
results: [
{

View File

@@ -3,11 +3,25 @@ import type { NormalizedFetchResponse, NormalizedSearchResponse } from "./provid
function formatFallbackLine(execution?: {
actualProviderName?: string;
failoverFromProviderName?: string;
attempts?: Array<{
providerName?: string;
status?: string;
}>;
}) {
if (!execution?.failoverFromProviderName || !execution.actualProviderName) {
if (execution?.failoverFromProviderName && execution.actualProviderName) {
return `Fallback: ${execution.failoverFromProviderName} -> ${execution.actualProviderName}`;
}
if (!execution?.actualProviderName || !execution.attempts?.length) {
return undefined;
}
return `Fallback: ${execution.failoverFromProviderName} -> ${execution.actualProviderName}`;
const firstFailedAttempt = execution.attempts.find((attempt) => attempt.status === "failed");
if (!firstFailedAttempt?.providerName || firstFailedAttempt.providerName === execution.actualProviderName) {
return undefined;
}
return `Fallback: ${firstFailedAttempt.providerName} -> ${execution.actualProviderName}`;
}
export function truncateText(text: string, maxCharacters = 4000) {

View File

@@ -0,0 +1,170 @@
import test from "node:test";
import assert from "node:assert/strict";
import { createFirecrawlProvider } from "./firecrawl.ts";
const cloudConfig = {
name: "firecrawl-main",
type: "firecrawl" as const,
apiKey: "fc-test-key",
options: {
defaultSearchLimit: 6,
},
};
test("createFirecrawlProvider maps search requests to Firecrawl /search", async () => {
let capturedUrl = "";
let capturedInit: RequestInit | undefined;
const provider = createFirecrawlProvider(cloudConfig, async (url, init) => {
capturedUrl = String(url);
capturedInit = init;
return new Response(
JSON.stringify({
success: true,
id: "search-1",
data: {
web: [
{
url: "https://pi.dev",
title: "Pi Docs",
description: "Pi docs summary",
markdown: "# Pi Docs",
},
],
},
}),
{ status: 200 },
);
});
const result = await provider.search({
query: "pi docs",
limit: 4,
includeDomains: ["pi.dev"],
excludeDomains: ["bad.example"],
firecrawl: {
country: "DE",
location: "Berlin, Germany",
categories: ["github"],
scrapeOptions: {
formats: ["markdown", "summary"],
},
},
});
const body = JSON.parse(String(capturedInit?.body));
assert.equal(capturedUrl, "https://api.firecrawl.dev/v2/search");
assert.deepEqual(capturedInit?.headers, {
"content-type": "application/json",
authorization: "Bearer fc-test-key",
});
assert.equal(body.query, "pi docs site:pi.dev -site:bad.example");
assert.equal(body.limit, 4);
assert.equal(body.country, "DE");
assert.equal(body.location, "Berlin, Germany");
assert.deepEqual(body.categories, ["github"]);
assert.deepEqual(body.scrapeOptions, {
formats: ["markdown", "summary"],
});
assert.equal(result.requestId, "search-1");
assert.equal(result.results[0]?.title, "Pi Docs");
assert.equal(result.results[0]?.content, "Pi docs summary");
assert.equal(result.results[0]?.rawContent, "# Pi Docs");
});
test("createFirecrawlProvider omits auth for self-hosted baseUrl when no apiKey is configured", async () => {
let capturedUrl = "";
let capturedInit: RequestInit | undefined;
const provider = createFirecrawlProvider(
{
name: "firecrawl-selfhosted",
type: "firecrawl",
baseUrl: "https://firecrawl.internal.example/v2",
},
async (url, init) => {
capturedUrl = String(url);
capturedInit = init;
return new Response(
JSON.stringify({
success: true,
data: {
web: [],
},
}),
{ status: 200 },
);
},
);
await provider.search({
query: "pi docs",
});
assert.equal(capturedUrl, "https://firecrawl.internal.example/v2/search");
assert.deepEqual(capturedInit?.headers, {
"content-type": "application/json",
});
});
test("createFirecrawlProvider fetches each URL via /scrape and preserves per-url failures", async () => {
const calls: Array<{ url: string; init: RequestInit | undefined }> = [];
const provider = createFirecrawlProvider(cloudConfig, async (url, init) => {
calls.push({ url: String(url), init });
const body = JSON.parse(String(init?.body));
if (body.url === "https://bad.example") {
return new Response(JSON.stringify({ error: "Payment required" }), {
status: 402,
statusText: "Payment Required",
});
}
return new Response(
JSON.stringify({
success: true,
data: {
metadata: {
title: "Pi",
sourceURL: body.url,
},
markdown: "Fetched body",
summary: "Short summary",
images: ["https://pi.dev/logo.png"],
},
}),
{ status: 200 },
);
});
const result = await provider.fetch({
urls: ["https://pi.dev", "https://bad.example"],
text: true,
summary: true,
firecrawl: {
formats: ["markdown", "summary", "images"],
},
});
const firstBody = JSON.parse(String(calls[0]?.init?.body));
assert.equal(calls[0]?.url, "https://api.firecrawl.dev/v2/scrape");
assert.deepEqual(firstBody, {
url: "https://pi.dev",
formats: ["markdown", "summary", "images"],
});
assert.deepEqual(result.results, [
{
url: "https://pi.dev",
title: "Pi",
text: "Fetched body",
summary: "Short summary",
images: ["https://pi.dev/logo.png"],
},
{
url: "https://bad.example",
title: null,
error: 'Provider "firecrawl-main" HTTP 402 Payment Required: {"error":"Payment required"}',
},
]);
});

198
src/providers/firecrawl.ts Normal file
View File

@@ -0,0 +1,198 @@
import type { FirecrawlProviderConfig } from "../schema.ts";
import { postJson, type ProviderFetchLike } from "./http.ts";
import type {
NormalizedFetchRequest,
NormalizedFetchResponse,
NormalizedSearchRequest,
NormalizedSearchResponse,
WebProvider,
} from "./types.ts";
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev/v2";
type FirecrawlSearchPayload = {
id?: string;
request_id?: string;
data?: {
web?: Array<{
url: string;
title?: string;
description?: string;
markdown?: string;
score?: number;
published_date?: string;
images?: string[];
}>;
};
};
type FirecrawlScrapePayload = {
success?: boolean;
data?: {
markdown?: string;
summary?: string;
images?: string[];
title?: string;
metadata?: {
title?: string;
sourceURL?: string;
};
};
};
function resolveBaseUrl(config: FirecrawlProviderConfig) {
return config.baseUrl ?? DEFAULT_FIRECRAWL_BASE_URL;
}
function appendSearchOperators(query: string, includeDomains?: string[], excludeDomains?: string[]) {
if ((includeDomains?.length ?? 0) > 1) {
throw new Error("Firecrawl currently supports at most one includeDomains entry.");
}
const parts = [query.trim()];
if (includeDomains?.[0]) {
parts.push(`site:${includeDomains[0]}`);
}
for (const domain of excludeDomains ?? []) {
parts.push(`-site:${domain}`);
}
return parts.join(" ").trim();
}
function resolveSearchCategories(request: NormalizedSearchRequest) {
if (request.category && request.firecrawl?.categories?.length) {
throw new Error("Firecrawl does not accept both top-level category and firecrawl.categories.");
}
if (request.firecrawl?.categories?.length) {
return request.firecrawl.categories;
}
return request.category ? [request.category] : undefined;
}
function uniqueFormats(formats: string[]) {
return [...new Set(formats)];
}
function resolveFetchFormats(request: NormalizedFetchRequest) {
if (request.highlights) {
throw new Error('Firecrawl does not support generic fetch option "highlights".');
}
const overrideFormats = request.firecrawl?.formats;
if (overrideFormats?.length) {
if (request.text && !overrideFormats.includes("markdown")) {
throw new Error('Firecrawl fetch option "text" requires firecrawl.formats to include "markdown".');
}
if (request.summary && !overrideFormats.includes("summary")) {
throw new Error('Firecrawl fetch option "summary" requires firecrawl.formats to include "summary".');
}
return uniqueFormats([...overrideFormats]);
}
const formats: string[] = [];
const wantsText = request.text ?? (!request.highlights && !request.summary);
if (wantsText) {
formats.push("markdown");
}
if (request.summary) {
formats.push("summary");
}
return uniqueFormats(formats.length > 0 ? formats : ["markdown"]);
}
function pickRequestId(payload: { id?: string; request_id?: string }) {
return typeof payload.id === "string"
? payload.id
: typeof payload.request_id === "string"
? payload.request_id
: undefined;
}
export function createFirecrawlProvider(
config: FirecrawlProviderConfig,
fetchImpl: ProviderFetchLike = fetch,
): WebProvider {
const baseUrl = resolveBaseUrl(config);
return {
name: config.name,
type: config.type,
async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> {
const payload = await postJson<FirecrawlSearchPayload>({
providerName: config.name,
baseUrl,
path: "/search",
apiKey: config.apiKey,
fetchImpl,
body: {
query: appendSearchOperators(request.query, request.includeDomains, request.excludeDomains),
limit: request.limit ?? config.options?.defaultSearchLimit ?? 5,
country: request.firecrawl?.country,
location: request.firecrawl?.location,
categories: resolveSearchCategories(request),
scrapeOptions: request.firecrawl?.scrapeOptions,
},
});
return {
providerName: config.name,
requestId: pickRequestId(payload),
results: (payload.data?.web ?? []).map((item) => ({
title: item.title ?? null,
url: item.url,
content: typeof item.description === "string" ? item.description : undefined,
rawContent: typeof item.markdown === "string" ? item.markdown : undefined,
score: item.score,
publishedDate: item.published_date,
images: Array.isArray(item.images) ? item.images : undefined,
})),
};
},
async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> {
const formats = resolveFetchFormats(request);
const results = await Promise.all(
request.urls.map(async (url) => {
try {
const payload = await postJson<FirecrawlScrapePayload>({
providerName: config.name,
baseUrl,
path: "/scrape",
apiKey: config.apiKey,
fetchImpl,
body: {
url,
formats,
},
});
return {
url: payload.data?.metadata?.sourceURL ?? url,
title: payload.data?.metadata?.title ?? payload.data?.title ?? null,
text: typeof payload.data?.markdown === "string" ? payload.data.markdown : undefined,
summary: typeof payload.data?.summary === "string" ? payload.data.summary : undefined,
images: Array.isArray(payload.data?.images) ? payload.data.images : undefined,
};
} catch (error) {
return {
url,
title: null,
error: (error as Error).message,
};
}
}),
);
return {
providerName: config.name,
results,
};
},
};
}

52
src/providers/http.ts Normal file
View File

@@ -0,0 +1,52 @@
export type ProviderFetchLike = (input: string, init?: RequestInit) => Promise<Response>;
interface PostJsonOptions {
providerName: string;
baseUrl: string;
path: string;
apiKey?: string;
body: unknown;
fetchImpl?: ProviderFetchLike;
}
export function joinApiUrl(baseUrl: string, path: string) {
const normalizedBaseUrl = baseUrl.replace(/\/+$/, "");
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
return `${normalizedBaseUrl}${normalizedPath}`;
}
export async function readHttpError(providerName: string, response: Response): Promise<never> {
const text = await response.text();
throw new Error(
`Provider "${providerName}" HTTP ${response.status} ${response.statusText}: ${text.slice(0, 300)}`,
);
}
export async function postJson<T>({
providerName,
baseUrl,
path,
apiKey,
body,
fetchImpl = fetch,
}: PostJsonOptions): Promise<T> {
const headers: Record<string, string> = {
"content-type": "application/json",
};
if (apiKey) {
headers.authorization = `Bearer ${apiKey}`;
}
const response = await fetchImpl(joinApiUrl(baseUrl, path), {
method: "POST",
headers,
body: JSON.stringify(body),
});
if (!response.ok) {
await readHttpError(providerName, response);
}
return (await response.json()) as T;
}

127
src/providers/registry.ts Normal file
View File

@@ -0,0 +1,127 @@
import type { FirecrawlProviderConfig, TavilyProviderConfig, WebSearchProviderConfig, ExaProviderConfig } from "../schema.ts";
import { createExaProvider } from "./exa.ts";
import { createFirecrawlProvider } from "./firecrawl.ts";
import { createTavilyProvider } from "./tavily.ts";
import type { NormalizedFetchRequest, NormalizedSearchRequest, WebProvider } from "./types.ts";
export type ProviderOptionBlock = "tavily" | "firecrawl";
interface ProviderCapabilities {
searchOptionBlocks: ProviderOptionBlock[];
fetchOptionBlocks: ProviderOptionBlock[];
fetchFeatures: {
text: boolean;
highlights: boolean;
summary: boolean;
};
}
interface ProviderDescriptor<TConfig extends WebSearchProviderConfig = WebSearchProviderConfig> {
type: TConfig["type"];
capabilities: ProviderCapabilities;
createProvider(config: TConfig): WebProvider;
validateSearchRequest?(providerName: string, request: NormalizedSearchRequest): void;
validateFetchRequest?(providerName: string, request: NormalizedFetchRequest): void;
}
const providerDescriptors = {
exa: {
type: "exa",
capabilities: {
searchOptionBlocks: [],
fetchOptionBlocks: [],
fetchFeatures: {
text: true,
highlights: true,
summary: true,
},
},
createProvider(config: ExaProviderConfig) {
return createExaProvider(config);
},
},
tavily: {
type: "tavily",
capabilities: {
searchOptionBlocks: ["tavily"],
fetchOptionBlocks: ["tavily"],
fetchFeatures: {
text: true,
highlights: true,
summary: true,
},
},
createProvider(config: TavilyProviderConfig) {
return createTavilyProvider(config);
},
},
firecrawl: {
type: "firecrawl",
capabilities: {
searchOptionBlocks: ["firecrawl"],
fetchOptionBlocks: ["firecrawl"],
fetchFeatures: {
text: true,
highlights: false,
summary: true,
},
},
createProvider(config: FirecrawlProviderConfig) {
return createFirecrawlProvider(config);
},
},
} satisfies Record<WebSearchProviderConfig["type"], ProviderDescriptor>;
function validateOptionBlocks(
providerName: string,
acceptedOptionBlocks: ProviderOptionBlock[],
blocks: Partial<Record<ProviderOptionBlock, unknown>>,
) {
for (const optionBlock of Object.keys(blocks) as ProviderOptionBlock[]) {
if (blocks[optionBlock] === undefined) {
continue;
}
if (!acceptedOptionBlocks.includes(optionBlock)) {
throw new Error(`Provider "${providerName}" does not accept the "${optionBlock}" options block.`);
}
}
}
export function getProviderDescriptor(provider: Pick<WebSearchProviderConfig, "type"> | WebSearchProviderConfig["type"]) {
const type = typeof provider === "string" ? provider : provider.type;
return providerDescriptors[type];
}
export function createProviderFromConfig(providerConfig: WebSearchProviderConfig) {
const descriptor = getProviderDescriptor(providerConfig);
return descriptor.createProvider(providerConfig as never);
}
export function validateSearchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedSearchRequest) {
const descriptor = getProviderDescriptor(providerConfig);
validateOptionBlocks(providerName, descriptor.capabilities.searchOptionBlocks, {
tavily: request.tavily,
firecrawl: request.firecrawl,
});
descriptor.validateSearchRequest?.(providerName, request);
}
export function validateFetchRequestForProvider(providerName: string, providerConfig: WebSearchProviderConfig, request: NormalizedFetchRequest) {
const descriptor = getProviderDescriptor(providerConfig);
validateOptionBlocks(providerName, descriptor.capabilities.fetchOptionBlocks, {
tavily: request.tavily,
firecrawl: request.firecrawl,
});
if (request.text && !descriptor.capabilities.fetchFeatures.text) {
throw new Error(`Provider "${providerName}" does not support generic fetch option "text".`);
}
if (request.highlights && !descriptor.capabilities.fetchFeatures.highlights) {
throw new Error(`Provider "${providerName}" does not support generic fetch option "highlights".`);
}
if (request.summary && !descriptor.capabilities.fetchFeatures.summary) {
throw new Error(`Provider "${providerName}" does not support generic fetch option "summary".`);
}
descriptor.validateFetchRequest?.(providerName, request);
}

View File

@@ -1,4 +1,5 @@
import type { TavilyProviderConfig } from "../schema.ts";
import { postJson, type ProviderFetchLike } from "./http.ts";
import type {
NormalizedFetchRequest,
NormalizedFetchResponse,
@@ -7,29 +8,22 @@ import type {
WebProvider,
} from "./types.ts";
export type TavilyFetchLike = (input: string, init?: RequestInit) => Promise<Response>;
async function readError(response: Response) {
const text = await response.text();
throw new Error(`Tavily ${response.status} ${response.statusText}: ${text.slice(0, 300)}`);
}
export function createTavilyProvider(
config: TavilyProviderConfig,
fetchImpl: TavilyFetchLike = fetch,
fetchImpl: ProviderFetchLike = fetch,
): WebProvider {
return {
name: config.name,
type: config.type,
async search(request: NormalizedSearchRequest): Promise<NormalizedSearchResponse> {
const response = await fetchImpl("https://api.tavily.com/search", {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
const data = await postJson<any>({
providerName: config.name,
baseUrl: "https://api.tavily.com",
path: "/search",
apiKey: config.apiKey,
fetchImpl,
body: {
query: request.query,
max_results: request.limit ?? config.options?.defaultSearchLimit ?? 5,
include_domains: request.includeDomains,
@@ -44,14 +38,9 @@ export function createTavilyProvider(
include_answer: request.tavily?.includeAnswer,
include_raw_content: request.tavily?.includeRawContent,
include_images: request.tavily?.includeImages,
}),
},
});
if (!response.ok) {
await readError(response);
}
const data = (await response.json()) as any;
return {
providerName: config.name,
requestId: data.request_id,
@@ -69,13 +58,13 @@ export function createTavilyProvider(
},
async fetch(request: NormalizedFetchRequest): Promise<NormalizedFetchResponse> {
const response = await fetchImpl("https://api.tavily.com/extract", {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
const data = await postJson<any>({
providerName: config.name,
baseUrl: "https://api.tavily.com",
path: "/extract",
apiKey: config.apiKey,
fetchImpl,
body: {
urls: request.urls,
query: request.tavily?.query,
extract_depth: request.tavily?.extractDepth,
@@ -83,14 +72,9 @@ export function createTavilyProvider(
include_images: request.tavily?.includeImages,
include_favicon: request.tavily?.includeFavicon,
format: request.tavily?.format,
}),
},
});
if (!response.ok) {
await readError(response);
}
const data = (await response.json()) as any;
return {
providerName: config.name,
requestIds: data.request_id ? [data.request_id] : [],

View File

@@ -18,6 +18,19 @@ export interface TavilyFetchOptions {
format?: string;
}
export interface FirecrawlSearchOptions {
country?: string;
location?: string;
categories?: string[];
scrapeOptions?: {
formats?: Array<"markdown" | "summary">;
};
}
export interface FirecrawlFetchOptions {
formats?: Array<"markdown" | "summary" | "images">;
}
export interface NormalizedSearchRequest {
query: string;
limit?: number;
@@ -28,6 +41,7 @@ export interface NormalizedSearchRequest {
category?: string;
provider?: string;
tavily?: TavilySearchOptions;
firecrawl?: FirecrawlSearchOptions;
}
export interface NormalizedSearchResult {
@@ -58,6 +72,7 @@ export interface NormalizedFetchRequest {
textMaxCharacters?: number;
provider?: string;
tavily?: TavilyFetchOptions;
firecrawl?: FirecrawlFetchOptions;
}
export interface NormalizedFetchResult {

View File

@@ -15,61 +15,54 @@ function createProvider(name: string, type: string, handlers: Partial<any>) {
};
}
test("search retries Tavily failures once with Exa", async () => {
test("search follows configured fallback chains and records every attempt", async () => {
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "tavily-main",
defaultProvider: { name: "tavily-main", type: "tavily", apiKey: "tvly" },
defaultProviderName: "firecrawl-main",
defaultProvider: {
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["tavily-backup"],
},
providers: [
{ name: "tavily-main", type: "tavily", apiKey: "tvly" },
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["tavily-backup"],
},
{
name: "tavily-backup",
type: "tavily",
apiKey: "tvly",
fallbackProviders: ["exa-fallback"],
},
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
["tavily-main", { name: "tavily-main", type: "tavily", apiKey: "tvly" }],
[
"firecrawl-main",
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["tavily-backup"] },
],
[
"tavily-backup",
{ name: "tavily-backup", type: "tavily", apiKey: "tvly", fallbackProviders: ["exa-fallback"] },
],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}),
createProvider(providerConfig) {
if (providerConfig.type === "tavily") {
if (providerConfig.name === "exa-fallback") {
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => {
throw new Error("503 upstream unavailable");
},
search: async () => ({
providerName: providerConfig.name,
results: [{ title: "Exa hit", url: "https://exa.ai" }],
}),
});
}
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => ({
providerName: providerConfig.name,
results: [{ title: "Exa hit", url: "https://exa.ai" }],
}),
});
},
});
const result = await runtime.search({ query: "pi docs" });
assert.equal(result.execution.actualProviderName, "exa-fallback");
assert.equal(result.execution.failoverFromProviderName, "tavily-main");
assert.match(result.execution.failoverReason ?? "", /503/);
});
test("search does not retry when Exa was explicitly selected", async () => {
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "tavily-main",
defaultProvider: { name: "tavily-main", type: "tavily", apiKey: "tvly" },
providers: [
{ name: "tavily-main", type: "tavily", apiKey: "tvly" },
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
["tavily-main", { name: "tavily-main", type: "tavily", apiKey: "tvly" }],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => {
throw new Error(`boom:${providerConfig.name}`);
@@ -78,8 +71,136 @@ test("search does not retry when Exa was explicitly selected", async () => {
},
});
await assert.rejects(
() => runtime.search({ query: "pi docs", provider: "exa-fallback" }),
/boom:exa-fallback/,
);
const result = await runtime.search({ query: "pi docs" });
assert.equal(result.execution.actualProviderName, "exa-fallback");
assert.equal(result.execution.failoverFromProviderName, "firecrawl-main");
assert.deepEqual(result.execution.attempts, [
{
providerName: "firecrawl-main",
status: "failed",
reason: "boom:firecrawl-main",
},
{
providerName: "tavily-backup",
status: "failed",
reason: "boom:tavily-backup",
},
{
providerName: "exa-fallback",
status: "succeeded",
},
]);
});
test("search rejects a mismatched provider-specific options block before provider execution", async () => {
let callCount = 0;
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "firecrawl-main",
defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" },
providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }],
providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => {
callCount += 1;
return {
providerName: providerConfig.name,
results: [],
};
},
});
},
});
await assert.rejects(
() => runtime.search({ query: "pi docs", tavily: { topic: "news" } }),
/does not accept the "tavily" options block/,
);
assert.equal(callCount, 0);
});
test("fetch rejects Firecrawl highlights before provider execution", async () => {
let callCount = 0;
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "firecrawl-main",
defaultProvider: { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" },
providers: [{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }],
providersByName: new Map([["firecrawl-main", { name: "firecrawl-main", type: "firecrawl", apiKey: "fc" }]]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
fetch: async () => {
callCount += 1;
return {
providerName: providerConfig.name,
results: [],
};
},
});
},
});
await assert.rejects(
() => runtime.fetch({ urls: ["https://pi.dev"], highlights: true }),
/does not support generic fetch option "highlights"/,
);
assert.equal(callCount, 0);
});
test("search starts with the explicitly requested provider and still follows its fallback chain", async () => {
const calls: string[] = [];
const runtime = createWebSearchRuntime({
loadConfig: async () => ({
path: "test.json",
defaultProviderName: "tavily-main",
defaultProvider: { name: "tavily-main", type: "tavily", apiKey: "tvly" },
providers: [
{ name: "tavily-main", type: "tavily", apiKey: "tvly" },
{
name: "firecrawl-main",
type: "firecrawl",
apiKey: "fc",
fallbackProviders: ["exa-fallback"],
},
{ name: "exa-fallback", type: "exa", apiKey: "exa" },
],
providersByName: new Map([
["tavily-main", { name: "tavily-main", type: "tavily", apiKey: "tvly" }],
[
"firecrawl-main",
{ name: "firecrawl-main", type: "firecrawl", apiKey: "fc", fallbackProviders: ["exa-fallback"] },
],
["exa-fallback", { name: "exa-fallback", type: "exa", apiKey: "exa" }],
]),
}),
createProvider(providerConfig) {
return createProvider(providerConfig.name, providerConfig.type, {
search: async () => {
calls.push(providerConfig.name);
if (providerConfig.name === "exa-fallback") {
return {
providerName: providerConfig.name,
results: [{ title: "Exa hit", url: "https://exa.ai" }],
};
}
throw new Error(`boom:${providerConfig.name}`);
},
});
},
});
const result = await runtime.search({ query: "pi docs", provider: "firecrawl-main" });
assert.deepEqual(calls, ["firecrawl-main", "exa-fallback"]);
assert.equal(result.execution.requestedProviderName, "firecrawl-main");
assert.equal(result.execution.actualProviderName, "exa-fallback");
});

View File

@@ -1,6 +1,9 @@
import { loadWebSearchConfig, type ResolvedWebSearchConfig } from "./config.ts";
import { createExaProvider } from "./providers/exa.ts";
import { createTavilyProvider } from "./providers/tavily.ts";
import {
createProviderFromConfig,
validateFetchRequestForProvider,
validateSearchRequestForProvider,
} from "./providers/registry.ts";
import type {
NormalizedFetchRequest,
NormalizedFetchResponse,
@@ -10,11 +13,18 @@ import type {
} from "./providers/types.ts";
import type { WebSearchProviderConfig } from "./schema.ts";
export interface ProviderExecutionAttempt {
providerName: string;
status: "failed" | "succeeded";
reason?: string;
}
export interface ProviderExecutionMeta {
requestedProviderName?: string;
actualProviderName: string;
failoverFromProviderName?: string;
failoverReason?: string;
attempts: ProviderExecutionAttempt[];
}
export interface RuntimeSearchResponse extends NormalizedSearchResponse {
@@ -25,6 +35,36 @@ export interface RuntimeFetchResponse extends NormalizedFetchResponse {
execution: ProviderExecutionMeta;
}
function createUnknownProviderError(config: ResolvedWebSearchConfig, selectedName: string) {
return new Error(
`Unknown web-search provider \"${selectedName}\". Configured providers: ${[...config.providersByName.keys()].join(", ")}`,
);
}
function buildExecutionMeta(
requestedProviderName: string | undefined,
actualProviderName: string,
attempts: ProviderExecutionAttempt[],
): ProviderExecutionMeta {
const firstFailedAttempt = attempts.find((attempt) => attempt.status === "failed");
const didFailOver = firstFailedAttempt && firstFailedAttempt.providerName !== actualProviderName;
return {
requestedProviderName,
actualProviderName,
failoverFromProviderName: didFailOver ? firstFailedAttempt?.providerName : undefined,
failoverReason: didFailOver ? firstFailedAttempt?.reason : undefined,
attempts,
};
}
function attachAttempts(error: unknown, attempts: ProviderExecutionAttempt[]) {
if (error instanceof Error) {
(error as Error & { execution?: { attempts: ProviderExecutionAttempt[] } }).execution = { attempts };
}
return error;
}
export function createWebSearchRuntime(
deps: {
loadConfig?: () => Promise<ResolvedWebSearchConfig>;
@@ -32,14 +72,7 @@ export function createWebSearchRuntime(
} = {},
) {
const loadConfig = deps.loadConfig ?? loadWebSearchConfig;
const createProvider = deps.createProvider ?? ((providerConfig: WebSearchProviderConfig) => {
switch (providerConfig.type) {
case "tavily":
return createTavilyProvider(providerConfig);
case "exa":
return createExaProvider(providerConfig);
}
});
const createProvider = deps.createProvider ?? createProviderFromConfig;
async function resolveConfigAndProvider(providerName?: string) {
const config = await loadConfig();
@@ -47,89 +80,85 @@ export function createWebSearchRuntime(
const selectedConfig = config.providersByName.get(selectedName);
if (!selectedConfig) {
throw new Error(
`Unknown web-search provider \"${selectedName}\". Configured providers: ${[...config.providersByName.keys()].join(", ")}`,
);
throw createUnknownProviderError(config, selectedName);
}
return {
config,
selectedName,
selectedConfig,
selectedProvider: createProvider(selectedConfig),
};
}
async function search(request: NormalizedSearchRequest): Promise<RuntimeSearchResponse> {
const { config, selectedName, selectedConfig, selectedProvider } = await resolveConfigAndProvider(request.provider);
async function executeWithFailover<TResponse extends NormalizedSearchResponse | NormalizedFetchResponse>(
request: NormalizedSearchRequest | NormalizedFetchRequest,
operation: "search" | "fetch",
): Promise<TResponse & { execution: ProviderExecutionMeta }> {
const { config, selectedName } = await resolveConfigAndProvider(request.provider);
const attempts: ProviderExecutionAttempt[] = [];
const pendingProviderNames = [selectedName];
const visited = new Set<string>();
let lastError: unknown;
try {
const response = await selectedProvider.search(request);
return {
...response,
execution: {
requestedProviderName: request.provider,
actualProviderName: selectedName,
},
};
} catch (error) {
if (selectedConfig.type !== "tavily") {
throw error;
while (pendingProviderNames.length > 0) {
const providerName = pendingProviderNames.shift();
if (!providerName || visited.has(providerName)) {
continue;
}
visited.add(providerName);
const providerConfig = config.providersByName.get(providerName);
if (!providerConfig) {
throw createUnknownProviderError(config, providerName);
}
const fallbackConfig = [...config.providersByName.values()].find((provider) => provider.type === "exa");
if (!fallbackConfig) {
throw error;
if (operation === "search") {
validateSearchRequestForProvider(providerName, providerConfig, request as NormalizedSearchRequest);
} else {
validateFetchRequestForProvider(providerName, providerConfig, request as NormalizedFetchRequest);
}
const fallbackProvider = createProvider(fallbackConfig);
const fallbackResponse = await fallbackProvider.search({ ...request, provider: fallbackConfig.name });
return {
...fallbackResponse,
execution: {
requestedProviderName: request.provider,
actualProviderName: fallbackConfig.name,
failoverFromProviderName: selectedName,
failoverReason: (error as Error).message,
},
};
const provider = createProvider(providerConfig);
try {
const response = await provider[operation]({
...request,
provider: providerName,
} as never);
attempts.push({
providerName,
status: "succeeded",
});
return {
...response,
execution: buildExecutionMeta(request.provider, providerName, attempts),
} as TResponse & { execution: ProviderExecutionMeta };
} catch (error) {
attempts.push({
providerName,
status: "failed",
reason: (error as Error).message,
});
lastError = error;
for (const fallbackProviderName of providerConfig.fallbackProviders ?? []) {
if (!visited.has(fallbackProviderName)) {
pendingProviderNames.push(fallbackProviderName);
}
}
}
}
throw attachAttempts(lastError, attempts);
}
async function search(request: NormalizedSearchRequest): Promise<RuntimeSearchResponse> {
return executeWithFailover<NormalizedSearchResponse>(request, "search");
}
async function fetch(request: NormalizedFetchRequest): Promise<RuntimeFetchResponse> {
const { config, selectedName, selectedConfig, selectedProvider } = await resolveConfigAndProvider(request.provider);
try {
const response = await selectedProvider.fetch(request);
return {
...response,
execution: {
requestedProviderName: request.provider,
actualProviderName: selectedName,
},
};
} catch (error) {
if (selectedConfig.type !== "tavily") {
throw error;
}
const fallbackConfig = [...config.providersByName.values()].find((provider) => provider.type === "exa");
if (!fallbackConfig) {
throw error;
}
const fallbackProvider = createProvider(fallbackConfig);
const fallbackResponse = await fallbackProvider.fetch({ ...request, provider: fallbackConfig.name });
return {
...fallbackResponse,
execution: {
requestedProviderName: request.provider,
actualProviderName: fallbackConfig.name,
failoverFromProviderName: selectedName,
failoverReason: (error as Error).message,
},
};
}
return executeWithFailover<NormalizedFetchResponse>(request, "fetch");
}
return {

View File

@@ -1,5 +1,8 @@
import { Type, type Static } from "@sinclair/typebox";
const NonEmptyStringSchema = Type.String({ minLength: 1 });
const FallbackProvidersSchema = Type.Optional(Type.Array(NonEmptyStringSchema, { minItems: 1 }));
export const ProviderOptionsSchema = Type.Object({
defaultSearchLimit: Type.Optional(Type.Integer({ minimum: 1 })),
defaultFetchTextMaxCharacters: Type.Optional(Type.Integer({ minimum: 1 })),
@@ -7,10 +10,11 @@ export const ProviderOptionsSchema = Type.Object({
});
export const ExaProviderConfigSchema = Type.Object({
name: Type.String({ minLength: 1 }),
name: NonEmptyStringSchema,
type: Type.Literal("exa"),
apiKey: Type.String({ minLength: 1 }),
apiKey: NonEmptyStringSchema,
options: Type.Optional(ProviderOptionsSchema),
fallbackProviders: FallbackProvidersSchema,
});
export const TavilyProviderOptionsSchema = Type.Object({
@@ -19,16 +23,34 @@ export const TavilyProviderOptionsSchema = Type.Object({
});
export const TavilyProviderConfigSchema = Type.Object({
name: Type.String({ minLength: 1 }),
name: NonEmptyStringSchema,
type: Type.Literal("tavily"),
apiKey: Type.String({ minLength: 1 }),
apiKey: NonEmptyStringSchema,
options: Type.Optional(TavilyProviderOptionsSchema),
fallbackProviders: FallbackProvidersSchema,
});
export const WebSearchProviderConfigSchema = Type.Union([ExaProviderConfigSchema, TavilyProviderConfigSchema]);
export const FirecrawlProviderOptionsSchema = Type.Object({
defaultSearchLimit: Type.Optional(Type.Integer({ minimum: 1 })),
});
export const FirecrawlProviderConfigSchema = Type.Object({
name: NonEmptyStringSchema,
type: Type.Literal("firecrawl"),
apiKey: Type.Optional(NonEmptyStringSchema),
baseUrl: Type.Optional(NonEmptyStringSchema),
options: Type.Optional(FirecrawlProviderOptionsSchema),
fallbackProviders: FallbackProvidersSchema,
});
export const WebSearchProviderConfigSchema = Type.Union([
ExaProviderConfigSchema,
TavilyProviderConfigSchema,
FirecrawlProviderConfigSchema,
]);
export const WebSearchConfigSchema = Type.Object({
defaultProvider: Type.String({ minLength: 1 }),
defaultProvider: NonEmptyStringSchema,
providers: Type.Array(WebSearchProviderConfigSchema, { minItems: 1 }),
});
@@ -52,6 +74,28 @@ export const TavilyFetchToolOptionsSchema = Type.Object({
format: Type.Optional(Type.String()),
});
export const FirecrawlSearchFormatSchema = Type.Union([Type.Literal("markdown"), Type.Literal("summary")]);
export const FirecrawlFetchFormatSchema = Type.Union([
Type.Literal("markdown"),
Type.Literal("summary"),
Type.Literal("images"),
]);
export const FirecrawlSearchToolOptionsSchema = Type.Object({
country: Type.Optional(Type.String()),
location: Type.Optional(Type.String()),
categories: Type.Optional(Type.Array(NonEmptyStringSchema, { minItems: 1 })),
scrapeOptions: Type.Optional(
Type.Object({
formats: Type.Optional(Type.Array(FirecrawlSearchFormatSchema, { minItems: 1 })),
}),
),
});
export const FirecrawlFetchToolOptionsSchema = Type.Object({
formats: Type.Optional(Type.Array(FirecrawlFetchFormatSchema, { minItems: 1 })),
});
export const WebSearchParamsSchema = Type.Object({
query: Type.String({ minLength: 1, description: "Search query" }),
limit: Type.Optional(Type.Integer({ minimum: 1, maximum: 25 })),
@@ -62,6 +106,7 @@ export const WebSearchParamsSchema = Type.Object({
category: Type.Optional(Type.String()),
provider: Type.Optional(Type.String()),
tavily: Type.Optional(TavilySearchToolOptionsSchema),
firecrawl: Type.Optional(FirecrawlSearchToolOptionsSchema),
});
export const WebFetchParamsSchema = Type.Object({
@@ -72,15 +117,22 @@ export const WebFetchParamsSchema = Type.Object({
textMaxCharacters: Type.Optional(Type.Integer({ minimum: 1 })),
provider: Type.Optional(Type.String()),
tavily: Type.Optional(TavilyFetchToolOptionsSchema),
firecrawl: Type.Optional(FirecrawlFetchToolOptionsSchema),
});
export type ProviderOptions = Static<typeof ProviderOptionsSchema>;
export type TavilyProviderOptions = Static<typeof TavilyProviderOptionsSchema>;
export type FirecrawlProviderOptions = Static<typeof FirecrawlProviderOptionsSchema>;
export type ExaProviderConfig = Static<typeof ExaProviderConfigSchema>;
export type TavilyProviderConfig = Static<typeof TavilyProviderConfigSchema>;
export type FirecrawlProviderConfig = Static<typeof FirecrawlProviderConfigSchema>;
export type WebSearchProviderConfig = Static<typeof WebSearchProviderConfigSchema>;
export type WebSearchConfig = Static<typeof WebSearchConfigSchema>;
export type TavilySearchToolOptions = Static<typeof TavilySearchToolOptionsSchema>;
export type TavilyFetchToolOptions = Static<typeof TavilyFetchToolOptionsSchema>;
export type FirecrawlSearchFormat = Static<typeof FirecrawlSearchFormatSchema>;
export type FirecrawlFetchFormat = Static<typeof FirecrawlFetchFormatSchema>;
export type FirecrawlSearchToolOptions = Static<typeof FirecrawlSearchToolOptionsSchema>;
export type FirecrawlFetchToolOptions = Static<typeof FirecrawlFetchToolOptionsSchema>;
export type WebSearchParams = Static<typeof WebSearchParamsSchema>;
export type WebFetchParams = Static<typeof WebFetchParamsSchema>;

View File

@@ -2,69 +2,37 @@ import test from "node:test";
import assert from "node:assert/strict";
import { createWebFetchTool } from "./web-fetch.ts";
test("web_fetch prepareArguments folds a single url into urls", () => {
const tool = createWebFetchTool({
executeFetch: async () => {
throw new Error("not used");
},
});
assert.deepEqual(tool.prepareArguments?.({ url: "https://exa.ai/docs" }), {
url: "https://exa.ai/docs",
urls: ["https://exa.ai/docs"],
});
});
test("web_fetch forwards nested Tavily extract options to the runtime", async () => {
let capturedRequest: any;
test("createWebFetchTool passes Firecrawl fetch options through to the runtime", async () => {
let captured: any;
const tool = createWebFetchTool({
executeFetch: async (request) => {
capturedRequest = request;
async executeFetch(request) {
captured = request;
return {
providerName: "tavily-main",
results: [
{
url: "https://pi.dev",
title: "Docs",
text: "Body",
},
],
execution: { actualProviderName: "tavily-main" },
providerName: "firecrawl-main",
results: [],
};
},
});
const result = await tool.execute(
"tool-1",
{
urls: ["https://pi.dev"],
tavily: {
query: "installation",
extractDepth: "advanced",
includeImages: true,
},
await tool.execute("tool-call", {
urls: ["https://pi.dev"],
provider: "firecrawl-main",
firecrawl: {
formats: ["markdown", "summary", "images"],
},
undefined,
undefined,
undefined,
);
} as any);
assert.equal(capturedRequest.tavily.query, "installation");
assert.equal(capturedRequest.tavily.extractDepth, "advanced");
assert.equal(capturedRequest.text, true);
assert.match((result.content[0] as { text: string }).text, /Body/);
});
test("web_fetch rejects malformed URLs", async () => {
const tool = createWebFetchTool({
executeFetch: async () => {
throw new Error("should not execute fetch for invalid URLs");
assert.deepEqual(captured, {
urls: ["https://pi.dev/"],
text: true,
highlights: false,
summary: false,
textMaxCharacters: undefined,
provider: "firecrawl-main",
tavily: undefined,
firecrawl: {
formats: ["markdown", "summary", "images"],
},
});
await assert.rejects(
() => tool.execute("tool-1", { urls: ["not-a-url"] }, undefined, undefined, undefined),
/Invalid URL/,
);
});

View File

@@ -29,6 +29,7 @@ function normalizeFetchParams(params: WebFetchParams & { url?: string }) {
textMaxCharacters: params.textMaxCharacters,
provider: params.provider,
tavily: params.tavily,
firecrawl: params.firecrawl,
};
}

View File

@@ -2,54 +2,47 @@ import test from "node:test";
import assert from "node:assert/strict";
import { createWebSearchTool } from "./web-search.ts";
test("web_search forwards nested Tavily options to the runtime", async () => {
let capturedRequest: any;
test("createWebSearchTool passes Firecrawl search options through to the runtime", async () => {
let captured: any;
const tool = createWebSearchTool({
executeSearch: async (request) => {
capturedRequest = request;
async executeSearch(request) {
captured = request;
return {
providerName: "tavily-main",
results: [
{
title: "Docs",
url: "https://pi.dev",
},
],
execution: { actualProviderName: "tavily-main" },
providerName: "firecrawl-main",
results: [],
};
},
});
const result = await tool.execute(
"tool-1",
{
query: "pi docs",
tavily: {
includeAnswer: true,
includeRawContent: true,
searchDepth: "advanced",
await tool.execute("tool-call", {
query: "pi docs",
provider: "firecrawl-main",
firecrawl: {
country: "DE",
categories: ["github"],
scrapeOptions: {
formats: ["markdown"],
},
},
undefined,
undefined,
undefined,
);
} as any);
assert.equal(capturedRequest.tavily.includeAnswer, true);
assert.equal(capturedRequest.tavily.searchDepth, "advanced");
assert.match((result.content[0] as { text: string }).text, /Docs/);
});
test("web_search rejects a blank query before resolving a provider", async () => {
const tool = createWebSearchTool({
executeSearch: async () => {
throw new Error("should not execute search for a blank query");
assert.deepEqual(captured, {
query: "pi docs",
limit: undefined,
includeDomains: undefined,
excludeDomains: undefined,
startPublishedDate: undefined,
endPublishedDate: undefined,
category: undefined,
provider: "firecrawl-main",
tavily: undefined,
firecrawl: {
country: "DE",
categories: ["github"],
scrapeOptions: {
formats: ["markdown"],
},
},
});
await assert.rejects(
() => tool.execute("tool-1", { query: " " }, undefined, undefined, undefined),
/non-empty query/,
);
});

View File

@@ -34,6 +34,7 @@ export function createWebSearchTool({ executeSearch }: SearchToolDeps) {
category: params.category,
provider: params.provider,
tavily: params.tavily,
firecrawl: params.firecrawl,
});
return {