fix(chat): stop retry spirals on tool failures

This commit is contained in:
2026-03-10 18:05:34 +00:00
parent 212ce33e36
commit dbabbdf9f0
6 changed files with 490 additions and 14 deletions

View File

@@ -1,3 +1,9 @@
---
title: AGENTS
type: note
permalink: voyage/agents
---
# Voyage Development Instructions (OpenCode)
## Project
@@ -57,7 +63,7 @@ Run in this order:
## Known Issues (Expected)
- Frontend `bun run check`: **0 errors + 6 warnings** expected (pre-existing in `CollectionRecommendationView.svelte` + `RegionCard.svelte`)
- Backend tests: **6/39 fail** (pre-existing: 2 user email key errors + 4 geocoding API mocks; 9 new chat tests all pass)
- Backend tests: **6/39 fail** (pre-existing: 2 user email key errors + 4 geocoding API mocks; 32 chat tests all pass)
- Docker dev setup has frontend-backend communication issues (500 errors beyond homepage)
## Key Patterns
@@ -68,9 +74,11 @@ Run in this order:
- Chat providers: dynamic catalog from `GET /api/chat/providers/`; configured in `CHAT_PROVIDER_CONFIG`
- Chat model override: dropdown selector fed by `GET /api/chat/providers/{provider}/models/`; persisted in `localStorage` key `voyage_chat_model_prefs`; backend accepts optional `model` param in `send_message`
- Chat context: collection chats inject collection UUID + multi-stop itinerary context; system prompt guides `get_trip_details`-first reasoning and confirms only before first `add_to_itinerary`; `search_places` has a deterministic context-retry fallback — when the LLM omits `location`, the backend retries using the trip destination or first itinerary stop before asking the user for clarification; a dining-intent heuristic infers `category="food"` from user messages when the LLM omits category for restaurant/dining requests
- Chat tool output: `role=tool` messages hidden from display; tool outputs render as concise summaries; persisted tool rows reconstructed on reload via `rebuildConversationMessages()`
- Chat error surfacing: `_safe_error_payload()` maps LiteLLM exceptions to sanitized user-safe categories (never forwards raw `exc.message`)
- Chat tool output: `role=tool` messages hidden from display; tool outputs render as concise summaries; persisted tool rows reconstructed on reload via `rebuildConversationMessages()`; tool results are deduplicated by `tool_call_id` at three layers — rebuild from persisted rows (discards server-side pre-populated `tool_results`), SSE ingestion (`appendToolResultDedup`), and render-time (`uniqueToolResultsByCallId`)
- Chat error surfacing: `_safe_error_payload()` maps LiteLLM exceptions to sanitized user-safe categories (never forwards raw `exc.message`); `execute_tool()` catch-all returns a generic sanitized message (never raw `str(exc)`)
- Invalid tool calls (missing required args) are detected and short-circuited with a user-visible error — not replayed into history
- Tool execution failures (`search_places`, `web_search`, catch-all errors) are classified separately from required-param validation errors; execution failures emit a bounded `tool_execution_error` SSE event and stop — they are never replayed into LLM context; `tool_iterations` increments only on successful tool calls; all-failure rounds are capped at `MAX_ALL_FAILURE_ROUNDS` (3); permanent failures (e.g. `web_search` import error with `retryable: false`) stop immediately
- Geocoding failures in `search_places` (`Could not geocode location: ...`) are eligible for the existing context-retry path (trip destination → first itinerary stop → user clarification)
- Geocoding: `background_geocode_and_assign()` runs in a thread after Location save; populates `region`, `city`, `country`, and also fills `Location.location` from reverse geocode `display_name` (truncated to field max_length) if blank or different
## Conventions

View File

@@ -295,6 +295,7 @@ def web_search(user, query: str, location_context: str | None = None) -> dict:
return {
"error": "Web search is not available (duckduckgo-search not installed)",
"results": [],
"retryable": False,
}
except Exception as exc:
error_str = str(exc).lower()
@@ -637,9 +638,9 @@ def execute_tool(tool_name, user, **kwargs):
try:
return tool_fn(user=user, **filtered_kwargs)
except Exception as exc:
except Exception:
logger.exception("Tool %s failed", tool_name)
return {"error": str(exc)}
return {"error": "Tool execution failed"}
AGENT_TOOLS = get_tool_schemas()

View File

@@ -1,4 +1,5 @@
import json
from unittest import mock
from unittest.mock import patch
from django.contrib.auth import get_user_model
@@ -6,13 +7,64 @@ from django.test import TestCase
from rest_framework.test import APITransactionTestCase
from adventures.models import Collection, CollectionItineraryItem, Location
from chat.agent_tools import add_to_itinerary, get_trip_details
from chat.agent_tools import (
add_to_itinerary,
execute_tool,
get_trip_details,
web_search,
)
from chat.views import ChatViewSet
User = get_user_model()
class WebSearchToolFailureClassificationTests(TestCase):
def test_web_search_import_error_sets_retryable_false(self):
import builtins
original_import = builtins.__import__
def controlled_import(name, *args, **kwargs):
if name == "duckduckgo_search":
raise ImportError("missing dependency")
return original_import(name, *args, **kwargs)
user = User.objects.create_user(
username="chat-web-search-user",
email="chat-web-search-user@example.com",
password="password123",
)
with mock.patch("builtins.__import__", side_effect=controlled_import):
result = web_search(user, query="best restaurants")
self.assertEqual(
result.get("error"),
"Web search is not available (duckduckgo-search not installed)",
)
self.assertEqual(result.get("retryable"), False)
class ExecuteToolErrorSanitizationTests(TestCase):
def test_execute_tool_catch_all_returns_sanitized_error_message(self):
def raising_tool(user):
raise RuntimeError("sensitive backend detail")
user = User.objects.create_user(
username="chat-tool-sanitize-user",
email="chat-tool-sanitize-user@example.com",
password="password123",
)
with mock.patch.dict(
"chat.agent_tools._REGISTERED_TOOLS", {"boom": raising_tool}
):
result = execute_tool("boom", user)
self.assertEqual(result, {"error": "Tool execution failed"})
class ChatAgentToolSharedTripAccessTests(TestCase):
def setUp(self):
self.owner = User.objects.create_user(
@@ -173,6 +225,40 @@ class ChatViewSetToolValidationBoundaryTests(TestCase):
)
)
def test_execution_failure_error_detected(self):
self.assertTrue(
ChatViewSet._is_execution_failure_tool_error(
{"error": "Web search failed. Please try again."}
)
)
def test_required_error_not_treated_as_execution_failure(self):
self.assertFalse(
ChatViewSet._is_execution_failure_tool_error(
{"error": "location is required"}
)
)
def test_search_places_geocode_error_detected_for_location_retry(self):
self.assertTrue(
ChatViewSet._is_search_places_location_retry_candidate_error(
"search_places",
{"error": "Could not geocode location: ???"},
)
)
def test_retryable_execution_failure_defaults_true(self):
self.assertTrue(
ChatViewSet._is_retryable_execution_failure({"error": "Temporary outage"})
)
def test_retryable_execution_failure_honors_false_flag(self):
self.assertFalse(
ChatViewSet._is_retryable_execution_failure(
{"error": "Not installed", "retryable": False}
)
)
def test_likely_location_reply_heuristic_positive_case(self):
self.assertTrue(ChatViewSet._is_likely_location_reply("london"))
@@ -557,3 +643,217 @@ class ChatViewSetSearchPlacesClarificationTests(APITransactionTestCase):
self.assertEqual(mock_execute_tool.call_count, 2)
second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs
self.assertEqual(second_call_kwargs.get("location"), "Paris, France")
@patch("chat.views.execute_tool")
@patch("chat.views.stream_chat_completion")
@patch("integrations.utils.auto_profile.update_auto_preference_profile")
def test_geocode_failure_retries_with_trip_context_location(
self,
_mock_auto_profile,
mock_stream_chat_completion,
mock_execute_tool,
):
user = User.objects.create_user(
username="chat-geocode-retry-user",
email="chat-geocode-retry-user@example.com",
password="password123",
)
self.client.force_authenticate(user=user)
conversation_response = self.client.post(
"/api/chat/conversations/",
{"title": "Geocode Retry Test"},
format="json",
)
self.assertEqual(conversation_response.status_code, 201)
conversation_id = conversation_response.json()["id"]
async def first_stream(*args, **kwargs):
yield 'data: {"tool_calls": [{"index": 0, "id": "call_1", "type": "function", "function": {"name": "search_places", "arguments": "{}"}}]}\n\n'
yield "data: [DONE]\n\n"
async def second_stream(*args, **kwargs):
yield 'data: {"content": "Here are options in Lisbon."}\n\n'
yield "data: [DONE]\n\n"
mock_stream_chat_completion.side_effect = [first_stream(), second_stream()]
mock_execute_tool.side_effect = [
{"error": "Could not geocode location: invalid"},
{"results": [{"name": "Time Out Market"}]},
]
response = self.client.post(
f"/api/chat/conversations/{conversation_id}/send_message/",
{
"message": "Find restaurants",
"destination": "Lisbon, Portugal",
},
format="json",
)
self.assertEqual(response.status_code, 200)
chunks = [
chunk.decode("utf-8")
if isinstance(chunk, (bytes, bytearray))
else str(chunk)
for chunk in response.streaming_content
]
payload_lines = [
chunk.strip()[len("data: ") :]
for chunk in chunks
if chunk.strip().startswith("data: ")
]
json_payloads = [
json.loads(payload) for payload in payload_lines if payload != "[DONE]"
]
self.assertTrue(any("tool_result" in payload for payload in json_payloads))
self.assertTrue(
any(
payload.get("content") == "Here are options in Lisbon."
for payload in json_payloads
)
)
self.assertEqual(mock_execute_tool.call_count, 2)
second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs
self.assertEqual(second_call_kwargs.get("location"), "Lisbon, Portugal")
class ChatViewSetToolExecutionFailureLoopTests(APITransactionTestCase):
@patch("chat.views.execute_tool")
@patch("chat.views.stream_chat_completion")
@patch("integrations.utils.auto_profile.update_auto_preference_profile")
def test_all_failure_rounds_stop_with_execution_error_before_tool_cap(
self,
_mock_auto_profile,
mock_stream_chat_completion,
mock_execute_tool,
):
user = User.objects.create_user(
username="chat-loop-failure-user",
email="chat-loop-failure-user@example.com",
password="password123",
)
self.client.force_authenticate(user=user)
conversation_response = self.client.post(
"/api/chat/conversations/",
{"title": "Failure Loop Test"},
format="json",
)
self.assertEqual(conversation_response.status_code, 201)
conversation_id = conversation_response.json()["id"]
async def failing_stream(*args, **kwargs):
yield 'data: {"tool_calls": [{"index": 0, "id": "call_w", "type": "function", "function": {"name": "web_search", "arguments": "{\\"query\\":\\"restaurants\\"}"}}]}\n\n'
yield "data: [DONE]\n\n"
mock_stream_chat_completion.side_effect = failing_stream
mock_execute_tool.return_value = {
"error": "Web search failed. Please try again.",
"results": [],
}
response = self.client.post(
f"/api/chat/conversations/{conversation_id}/send_message/",
{"message": "Find restaurants near me"},
format="json",
)
self.assertEqual(response.status_code, 200)
chunks = [
chunk.decode("utf-8")
if isinstance(chunk, (bytes, bytearray))
else str(chunk)
for chunk in response.streaming_content
]
payload_lines = [
chunk.strip()[len("data: ") :]
for chunk in chunks
if chunk.strip().startswith("data: ")
]
json_payloads = [
json.loads(payload) for payload in payload_lines if payload != "[DONE]"
]
self.assertTrue(
any(
payload.get("error_category") == "tool_execution_error"
for payload in json_payloads
)
)
self.assertFalse(
any(
payload.get("error_category") == "tool_loop_limit"
for payload in json_payloads
)
)
self.assertFalse(any("tool_result" in payload for payload in json_payloads))
self.assertEqual(mock_stream_chat_completion.call_count, 3)
self.assertEqual(mock_execute_tool.call_count, 3)
@patch("chat.views.execute_tool")
@patch("chat.views.stream_chat_completion")
@patch("integrations.utils.auto_profile.update_auto_preference_profile")
def test_permanent_execution_failure_stops_immediately(
self,
_mock_auto_profile,
mock_stream_chat_completion,
mock_execute_tool,
):
user = User.objects.create_user(
username="chat-permanent-failure-user",
email="chat-permanent-failure-user@example.com",
password="password123",
)
self.client.force_authenticate(user=user)
conversation_response = self.client.post(
"/api/chat/conversations/",
{"title": "Permanent Failure Test"},
format="json",
)
self.assertEqual(conversation_response.status_code, 201)
conversation_id = conversation_response.json()["id"]
async def failing_stream(*args, **kwargs):
yield 'data: {"tool_calls": [{"index": 0, "id": "call_w", "type": "function", "function": {"name": "web_search", "arguments": "{\\"query\\":\\"restaurants\\"}"}}]}\n\n'
yield "data: [DONE]\n\n"
mock_stream_chat_completion.side_effect = failing_stream
mock_execute_tool.return_value = {
"error": "Web search is not available (duckduckgo-search not installed)",
"results": [],
"retryable": False,
}
response = self.client.post(
f"/api/chat/conversations/{conversation_id}/send_message/",
{"message": "Find restaurants near me"},
format="json",
)
self.assertEqual(response.status_code, 200)
chunks = [
chunk.decode("utf-8")
if isinstance(chunk, (bytes, bytearray))
else str(chunk)
for chunk in response.streaming_content
]
payload_lines = [
chunk.strip()[len("data: ") :]
for chunk in chunks
if chunk.strip().startswith("data: ")
]
json_payloads = [
json.loads(payload) for payload in payload_lines if payload != "[DONE]"
]
self.assertTrue(
any(
payload.get("error_category") == "tool_execution_error"
for payload in json_payloads
)
)
self.assertEqual(mock_stream_chat_completion.call_count, 1)
self.assertEqual(mock_execute_tool.call_count, 1)

View File

@@ -62,6 +62,9 @@ class ChatViewSet(viewsets.ModelViewSet):
if message.role == "tool"
and message.tool_call_id
and not self._is_required_param_tool_error_message_content(message.content)
and not self._is_execution_failure_tool_error_message_content(
message.content
)
}
messages = [
@@ -76,6 +79,13 @@ class ChatViewSet(viewsets.ModelViewSet):
and self._is_required_param_tool_error_message_content(message.content)
):
continue
if (
message.role == "tool"
and self._is_execution_failure_tool_error_message_content(
message.content
)
):
continue
payload = {
"role": message.role,
@@ -152,6 +162,24 @@ class ChatViewSet(viewsets.ModelViewSet):
)
)
@classmethod
def _is_execution_failure_tool_error(cls, result):
if not isinstance(result, dict):
return False
error_text = result.get("error")
if not isinstance(error_text, str) or not error_text.strip():
return False
return not cls._is_required_param_tool_error(result)
@staticmethod
def _is_retryable_execution_failure(result):
if not isinstance(result, dict):
return False
return result.get("retryable", True) is not False
@classmethod
def _is_required_param_tool_error_message_content(cls, content):
if not isinstance(content, str):
@@ -164,6 +192,18 @@ class ChatViewSet(viewsets.ModelViewSet):
return cls._is_required_param_tool_error(parsed)
@classmethod
def _is_execution_failure_tool_error_message_content(cls, content):
if not isinstance(content, str):
return False
try:
parsed = json.loads(content)
except json.JSONDecodeError:
return False
return cls._is_execution_failure_tool_error(parsed)
@staticmethod
def _build_required_param_error_event(tool_name, result):
tool_error = result.get("error") if isinstance(result, dict) else ""
@@ -190,6 +230,24 @@ class ChatViewSet(viewsets.ModelViewSet):
normalized_error = error_text.strip().lower()
return "location" in normalized_error
@staticmethod
def _is_search_places_geocode_error(tool_name, result):
if tool_name != "search_places" or not isinstance(result, dict):
return False
error_text = result.get("error")
if not isinstance(error_text, str):
return False
return error_text.strip().lower().startswith("could not geocode location")
@classmethod
def _is_search_places_location_retry_candidate_error(cls, tool_name, result):
return cls._is_search_places_missing_location_required_error(
tool_name,
result,
) or cls._is_search_places_geocode_error(tool_name, result)
@staticmethod
def _build_search_places_location_clarification_message():
return (
@@ -198,6 +256,21 @@ class ChatViewSet(viewsets.ModelViewSet):
"activities, or lodging."
)
@staticmethod
def _build_tool_execution_error_event(tool_name, result):
tool_error = (
(result or {}).get("error")
if isinstance(result, dict)
else "Tool execution failed"
)
return {
"error": (
f"The assistant could not complete '{tool_name}' ({tool_error}). "
"Please try again in a moment or adjust your request."
),
"error_category": "tool_execution_error",
}
@staticmethod
def _normalize_trip_context_destination(destination):
destination_text = (destination or "").strip()
@@ -420,11 +493,13 @@ class ChatViewSet(viewsets.ModelViewSet):
)
MAX_TOOL_ITERATIONS = 10
MAX_ALL_FAILURE_ROUNDS = 3
async def event_stream():
current_messages = list(llm_messages)
encountered_error = False
tool_iterations = 0
all_failure_rounds = 0
while tool_iterations < MAX_TOOL_ITERATIONS:
content_chunks = []
@@ -472,10 +547,11 @@ class ChatViewSet(viewsets.ModelViewSet):
assistant_content = "".join(content_chunks)
if tool_calls_accumulator:
tool_iterations += 1
successful_tool_calls = []
successful_tool_messages = []
successful_tool_chat_entries = []
first_execution_failure = None
encountered_permanent_failure = False
for tool_call in tool_calls_accumulator:
function_payload = tool_call.get("function") or {}
@@ -519,7 +595,7 @@ class ChatViewSet(viewsets.ModelViewSet):
**prepared_arguments,
)
if self._is_search_places_missing_location_required_error(
if self._is_search_places_location_retry_candidate_error(
function_name,
result,
):
@@ -552,7 +628,11 @@ class ChatViewSet(viewsets.ModelViewSet):
**retry_arguments,
)
if not self._is_required_param_tool_error(retry_result):
if not self._is_required_param_tool_error(
retry_result
) and not self._is_execution_failure_tool_error(
retry_result
):
result = retry_result
tool_call_for_history = {
**tool_call,
@@ -630,6 +710,13 @@ class ChatViewSet(viewsets.ModelViewSet):
yield "data: [DONE]\n\n"
return
if self._is_execution_failure_tool_error(result):
if first_execution_failure is None:
first_execution_failure = (function_name, result)
if not self._is_retryable_execution_failure(result):
encountered_permanent_failure = True
continue
result_content = serialize_tool_result(result)
successful_tool_calls.append(tool_call_for_history)
@@ -659,6 +746,41 @@ class ChatViewSet(viewsets.ModelViewSet):
}
yield f"data: {json.dumps(tool_event)}\n\n"
if not successful_tool_calls and first_execution_failure:
if encountered_permanent_failure:
all_failure_rounds = MAX_ALL_FAILURE_ROUNDS
else:
all_failure_rounds += 1
if all_failure_rounds >= MAX_ALL_FAILURE_ROUNDS:
failed_tool_name, failed_tool_result = (
first_execution_failure
)
error_event = self._build_tool_execution_error_event(
failed_tool_name,
failed_tool_result,
)
await sync_to_async(
ChatMessage.objects.create,
thread_sensitive=True,
)(
conversation=conversation,
role="assistant",
content=error_event["error"],
)
await sync_to_async(
conversation.save,
thread_sensitive=True,
)(update_fields=["updated_at"])
yield f"data: {json.dumps(error_event)}\n\n"
yield "data: [DONE]\n\n"
return
continue
all_failure_rounds = 0
tool_iterations += 1
assistant_with_tools = {
"role": "assistant",
"content": assistant_content,

View File

@@ -30,7 +30,7 @@ You can set a **default AI provider and model** in **Settings** (under "Default
Day suggestions (the AI-generated place recommendations for specific itinerary days) also respect your saved default provider and model. If no defaults are saved, the instance-level provider configured by the server admin is used.
Provider errors (authentication, model not found, rate limits, invalid tool calls) are displayed as clear, actionable messages in the chat. If the AI attempts to use a tool incorrectly (e.g., missing required parameters), the error is surfaced once and the conversation stops cleanly rather than looping.
Provider errors (authentication, model not found, rate limits, invalid tool calls) are displayed as clear, actionable messages in the chat. If the AI attempts to use a tool incorrectly (e.g., missing required parameters), the error is surfaced once and the conversation stops cleanly rather than looping. Tool execution failures (e.g., a place search that can't geocode, or an unavailable web search service) also stop with a single error message — the assistant won't keep retrying a broken tool.
#### Collections

View File

@@ -6,6 +6,7 @@
import { addToast } from '$lib/toasts';
type ToolResultEntry = {
tool_call_id?: string;
name: string;
result: unknown;
};
@@ -319,21 +320,58 @@
try {
return {
tool_call_id: msg.tool_call_id,
name: msg.name || 'tool',
result: JSON.parse(msg.content)
};
} catch {
return {
tool_call_id: msg.tool_call_id,
name: msg.name || 'tool',
result: msg.content
};
}
}
function appendToolResultDedup(
toolResults: ToolResultEntry[] | undefined,
toolResult: ToolResultEntry
): ToolResultEntry[] {
const next = toolResults || [];
if (
toolResult.tool_call_id &&
next.some((existing) => existing.tool_call_id === toolResult.tool_call_id)
) {
return next;
}
return [...next, toolResult];
}
function uniqueToolResultsByCallId(toolResults: ToolResultEntry[] | undefined): ToolResultEntry[] {
if (!toolResults) {
return [];
}
const seen = new Set<string>();
const unique: ToolResultEntry[] = [];
for (const result of toolResults) {
if (result.tool_call_id) {
if (seen.has(result.tool_call_id)) {
continue;
}
seen.add(result.tool_call_id);
}
unique.push(result);
}
return unique;
}
function rebuildConversationMessages(rawMessages: ChatMessage[]): ChatMessage[] {
const rebuilt = rawMessages.map((msg) => ({
...msg,
tool_results: msg.tool_results ? [...msg.tool_results] : undefined
tool_results: undefined
}));
let activeAssistant: ChatMessage | null = null;
@@ -361,7 +399,10 @@
continue;
}
activeAssistant.tool_results = [...(activeAssistant.tool_results || []), parsedResult];
activeAssistant.tool_results = appendToolResultDedup(
activeAssistant.tool_results,
parsedResult
);
if (
toolCallIds.length > 0 &&
@@ -466,10 +507,14 @@
if (parsed.tool_result) {
const toolResult: ToolResultEntry = {
tool_call_id: parsed.tool_result.tool_call_id,
name: parsed.tool_result.name || parsed.tool_result.tool || 'tool',
result: parsed.tool_result.result
};
assistantMsg.tool_results = [...(assistantMsg.tool_results || []), toolResult];
assistantMsg.tool_results = appendToolResultDedup(
assistantMsg.tool_results,
toolResult
);
messages = [...messages];
}
} catch {
@@ -891,7 +936,7 @@
<div class="whitespace-pre-wrap">{msg.content}</div>
{#if msg.role === 'assistant' && msg.tool_results}
<div class="mt-2 space-y-2">
{#each msg.tool_results as result}
{#each uniqueToolResultsByCallId(msg.tool_results) as result}
{#if hasPlaceResults(result)}
<div class="grid gap-2">
{#each getPlaceResults(result) as place}