fix(chat): stop retry spirals on tool failures

2026-03-10 18:05:34 +00:00
parent 212ce33e36
commit dbabbdf9f0
6 changed files with 490 additions and 14 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,3 +1,9 @@
+---
+title: AGENTS
+type: note
+permalink: voyage/agents
+---
+
 # Voyage Development Instructions (OpenCode)

 ## Project
@@ -57,7 +63,7 @@ Run in this order:

 ## Known Issues (Expected)
 - Frontend `bun run check`: **0 errors + 6 warnings** expected (pre-existing in `CollectionRecommendationView.svelte` + `RegionCard.svelte`)
- Backend tests: **6/39 fail** (pre-existing: 2 user email key errors + 4 geocoding API mocks; 9 new chat tests all pass)
+- Backend tests: **6/39 fail** (pre-existing: 2 user email key errors + 4 geocoding API mocks; 32 chat tests all pass)
 - Docker dev setup has frontend-backend communication issues (500 errors beyond homepage)

 ## Key Patterns
@@ -68,9 +74,11 @@ Run in this order:
 - Chat providers: dynamic catalog from `GET /api/chat/providers/`; configured in `CHAT_PROVIDER_CONFIG`
 - Chat model override: dropdown selector fed by `GET /api/chat/providers/{provider}/models/`; persisted in `localStorage` key `voyage_chat_model_prefs`; backend accepts optional `model` param in `send_message`
 - Chat context: collection chats inject collection UUID + multi-stop itinerary context; system prompt guides `get_trip_details`-first reasoning and confirms only before first `add_to_itinerary`; `search_places` has a deterministic context-retry fallback — when the LLM omits `location`, the backend retries using the trip destination or first itinerary stop before asking the user for clarification; a dining-intent heuristic infers `category="food"` from user messages when the LLM omits category for restaurant/dining requests
- Chat tool output: `role=tool` messages hidden from display; tool outputs render as concise summaries; persisted tool rows reconstructed on reload via `rebuildConversationMessages()`
- Chat error surfacing: `_safe_error_payload()` maps LiteLLM exceptions to sanitized user-safe categories (never forwards raw `exc.message`)
+- Chat tool output: `role=tool` messages hidden from display; tool outputs render as concise summaries; persisted tool rows reconstructed on reload via `rebuildConversationMessages()`; tool results are deduplicated by `tool_call_id` at three layers — rebuild from persisted rows (discards server-side pre-populated `tool_results`), SSE ingestion (`appendToolResultDedup`), and render-time (`uniqueToolResultsByCallId`)
+- Chat error surfacing: `_safe_error_payload()` maps LiteLLM exceptions to sanitized user-safe categories (never forwards raw `exc.message`); `execute_tool()` catch-all returns a generic sanitized message (never raw `str(exc)`)
 - Invalid tool calls (missing required args) are detected and short-circuited with a user-visible error — not replayed into history
+- Tool execution failures (`search_places`, `web_search`, catch-all errors) are classified separately from required-param validation errors; execution failures emit a bounded `tool_execution_error` SSE event and stop — they are never replayed into LLM context; `tool_iterations` increments only on successful tool calls; all-failure rounds are capped at `MAX_ALL_FAILURE_ROUNDS` (3); permanent failures (e.g. `web_search` import error with `retryable: false`) stop immediately
+- Geocoding failures in `search_places` (`Could not geocode location: ...`) are eligible for the existing context-retry path (trip destination → first itinerary stop → user clarification)
 - Geocoding: `background_geocode_and_assign()` runs in a thread after Location save; populates `region`, `city`, `country`, and also fills `Location.location` from reverse geocode `display_name` (truncated to field max_length) if blank or different

 ## Conventions
--- a/backend/server/chat/agent_tools.py
+++ b/backend/server/chat/agent_tools.py
@@ -295,6 +295,7 @@ def web_search(user, query: str, location_context: str | None = None) -> dict:
        return {
            "error": "Web search is not available (duckduckgo-search not installed)",
            "results": [],
+            "retryable": False,
        }
    except Exception as exc:
        error_str = str(exc).lower()
@@ -637,9 +638,9 @@ def execute_tool(tool_name, user, **kwargs):

    try:
        return tool_fn(user=user, **filtered_kwargs)
-    except Exception as exc:
+    except Exception:
        logger.exception("Tool %s failed", tool_name)
-        return {"error": str(exc)}
+        return {"error": "Tool execution failed"}


 AGENT_TOOLS = get_tool_schemas()
--- a/backend/server/chat/tests.py
+++ b/backend/server/chat/tests.py
@@ -1,4 +1,5 @@
 import json
+from unittest import mock
 from unittest.mock import patch

 from django.contrib.auth import get_user_model
@@ -6,13 +7,64 @@ from django.test import TestCase
 from rest_framework.test import APITransactionTestCase

 from adventures.models import Collection, CollectionItineraryItem, Location
-from chat.agent_tools import add_to_itinerary, get_trip_details
+from chat.agent_tools import (
+    add_to_itinerary,
+    execute_tool,
+    get_trip_details,
+    web_search,
+)
 from chat.views import ChatViewSet


 User = get_user_model()


+class WebSearchToolFailureClassificationTests(TestCase):
+    def test_web_search_import_error_sets_retryable_false(self):
+        import builtins
+
+        original_import = builtins.__import__
+
+        def controlled_import(name, *args, **kwargs):
+            if name == "duckduckgo_search":
+                raise ImportError("missing dependency")
+            return original_import(name, *args, **kwargs)
+
+        user = User.objects.create_user(
+            username="chat-web-search-user",
+            email="chat-web-search-user@example.com",
+            password="password123",
+        )
+
+        with mock.patch("builtins.__import__", side_effect=controlled_import):
+            result = web_search(user, query="best restaurants")
+
+        self.assertEqual(
+            result.get("error"),
+            "Web search is not available (duckduckgo-search not installed)",
+        )
+        self.assertEqual(result.get("retryable"), False)
+
+
+class ExecuteToolErrorSanitizationTests(TestCase):
+    def test_execute_tool_catch_all_returns_sanitized_error_message(self):
+        def raising_tool(user):
+            raise RuntimeError("sensitive backend detail")
+
+        user = User.objects.create_user(
+            username="chat-tool-sanitize-user",
+            email="chat-tool-sanitize-user@example.com",
+            password="password123",
+        )
+
+        with mock.patch.dict(
+            "chat.agent_tools._REGISTERED_TOOLS", {"boom": raising_tool}
+        ):
+            result = execute_tool("boom", user)
+
+        self.assertEqual(result, {"error": "Tool execution failed"})
+
+
 class ChatAgentToolSharedTripAccessTests(TestCase):
    def setUp(self):
        self.owner = User.objects.create_user(
@@ -173,6 +225,40 @@ class ChatViewSetToolValidationBoundaryTests(TestCase):
            )
        )

+    def test_execution_failure_error_detected(self):
+        self.assertTrue(
+            ChatViewSet._is_execution_failure_tool_error(
+                {"error": "Web search failed. Please try again."}
+            )
+        )
+
+    def test_required_error_not_treated_as_execution_failure(self):
+        self.assertFalse(
+            ChatViewSet._is_execution_failure_tool_error(
+                {"error": "location is required"}
+            )
+        )
+
+    def test_search_places_geocode_error_detected_for_location_retry(self):
+        self.assertTrue(
+            ChatViewSet._is_search_places_location_retry_candidate_error(
+                "search_places",
+                {"error": "Could not geocode location: ???"},
+            )
+        )
+
+    def test_retryable_execution_failure_defaults_true(self):
+        self.assertTrue(
+            ChatViewSet._is_retryable_execution_failure({"error": "Temporary outage"})
+        )
+
+    def test_retryable_execution_failure_honors_false_flag(self):
+        self.assertFalse(
+            ChatViewSet._is_retryable_execution_failure(
+                {"error": "Not installed", "retryable": False}
+            )
+        )
+
    def test_likely_location_reply_heuristic_positive_case(self):
        self.assertTrue(ChatViewSet._is_likely_location_reply("london"))

@@ -557,3 +643,217 @@ class ChatViewSetSearchPlacesClarificationTests(APITransactionTestCase):
        self.assertEqual(mock_execute_tool.call_count, 2)
        second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs
        self.assertEqual(second_call_kwargs.get("location"), "Paris, France")
+
+    @patch("chat.views.execute_tool")
+    @patch("chat.views.stream_chat_completion")
+    @patch("integrations.utils.auto_profile.update_auto_preference_profile")
+    def test_geocode_failure_retries_with_trip_context_location(
+        self,
+        _mock_auto_profile,
+        mock_stream_chat_completion,
+        mock_execute_tool,
+    ):
+        user = User.objects.create_user(
+            username="chat-geocode-retry-user",
+            email="chat-geocode-retry-user@example.com",
+            password="password123",
+        )
+        self.client.force_authenticate(user=user)
+
+        conversation_response = self.client.post(
+            "/api/chat/conversations/",
+            {"title": "Geocode Retry Test"},
+            format="json",
+        )
+        self.assertEqual(conversation_response.status_code, 201)
+        conversation_id = conversation_response.json()["id"]
+
+        async def first_stream(*args, **kwargs):
+            yield 'data: {"tool_calls": [{"index": 0, "id": "call_1", "type": "function", "function": {"name": "search_places", "arguments": "{}"}}]}\n\n'
+            yield "data: [DONE]\n\n"
+
+        async def second_stream(*args, **kwargs):
+            yield 'data: {"content": "Here are options in Lisbon."}\n\n'
+            yield "data: [DONE]\n\n"
+
+        mock_stream_chat_completion.side_effect = [first_stream(), second_stream()]
+        mock_execute_tool.side_effect = [
+            {"error": "Could not geocode location: invalid"},
+            {"results": [{"name": "Time Out Market"}]},
+        ]
+
+        response = self.client.post(
+            f"/api/chat/conversations/{conversation_id}/send_message/",
+            {
+                "message": "Find restaurants",
+                "destination": "Lisbon, Portugal",
+            },
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        chunks = [
+            chunk.decode("utf-8")
+            if isinstance(chunk, (bytes, bytearray))
+            else str(chunk)
+            for chunk in response.streaming_content
+        ]
+        payload_lines = [
+            chunk.strip()[len("data: ") :]
+            for chunk in chunks
+            if chunk.strip().startswith("data: ")
+        ]
+        json_payloads = [
+            json.loads(payload) for payload in payload_lines if payload != "[DONE]"
+        ]
+
+        self.assertTrue(any("tool_result" in payload for payload in json_payloads))
+        self.assertTrue(
+            any(
+                payload.get("content") == "Here are options in Lisbon."
+                for payload in json_payloads
+            )
+        )
+        self.assertEqual(mock_execute_tool.call_count, 2)
+        second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs
+        self.assertEqual(second_call_kwargs.get("location"), "Lisbon, Portugal")
+
+
+class ChatViewSetToolExecutionFailureLoopTests(APITransactionTestCase):
+    @patch("chat.views.execute_tool")
+    @patch("chat.views.stream_chat_completion")
+    @patch("integrations.utils.auto_profile.update_auto_preference_profile")
+    def test_all_failure_rounds_stop_with_execution_error_before_tool_cap(
+        self,
+        _mock_auto_profile,
+        mock_stream_chat_completion,
+        mock_execute_tool,
+    ):
+        user = User.objects.create_user(
+            username="chat-loop-failure-user",
+            email="chat-loop-failure-user@example.com",
+            password="password123",
+        )
+        self.client.force_authenticate(user=user)
+
+        conversation_response = self.client.post(
+            "/api/chat/conversations/",
+            {"title": "Failure Loop Test"},
+            format="json",
+        )
+        self.assertEqual(conversation_response.status_code, 201)
+        conversation_id = conversation_response.json()["id"]
+
+        async def failing_stream(*args, **kwargs):
+            yield 'data: {"tool_calls": [{"index": 0, "id": "call_w", "type": "function", "function": {"name": "web_search", "arguments": "{\\"query\\":\\"restaurants\\"}"}}]}\n\n'
+            yield "data: [DONE]\n\n"
+
+        mock_stream_chat_completion.side_effect = failing_stream
+        mock_execute_tool.return_value = {
+            "error": "Web search failed. Please try again.",
+            "results": [],
+        }
+
+        response = self.client.post(
+            f"/api/chat/conversations/{conversation_id}/send_message/",
+            {"message": "Find restaurants near me"},
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        chunks = [
+            chunk.decode("utf-8")
+            if isinstance(chunk, (bytes, bytearray))
+            else str(chunk)
+            for chunk in response.streaming_content
+        ]
+        payload_lines = [
+            chunk.strip()[len("data: ") :]
+            for chunk in chunks
+            if chunk.strip().startswith("data: ")
+        ]
+        json_payloads = [
+            json.loads(payload) for payload in payload_lines if payload != "[DONE]"
+        ]
+
+        self.assertTrue(
+            any(
+                payload.get("error_category") == "tool_execution_error"
+                for payload in json_payloads
+            )
+        )
+        self.assertFalse(
+            any(
+                payload.get("error_category") == "tool_loop_limit"
+                for payload in json_payloads
+            )
+        )
+        self.assertFalse(any("tool_result" in payload for payload in json_payloads))
+        self.assertEqual(mock_stream_chat_completion.call_count, 3)
+        self.assertEqual(mock_execute_tool.call_count, 3)
+
+    @patch("chat.views.execute_tool")
+    @patch("chat.views.stream_chat_completion")
+    @patch("integrations.utils.auto_profile.update_auto_preference_profile")
+    def test_permanent_execution_failure_stops_immediately(
+        self,
+        _mock_auto_profile,
+        mock_stream_chat_completion,
+        mock_execute_tool,
+    ):
+        user = User.objects.create_user(
+            username="chat-permanent-failure-user",
+            email="chat-permanent-failure-user@example.com",
+            password="password123",
+        )
+        self.client.force_authenticate(user=user)
+
+        conversation_response = self.client.post(
+            "/api/chat/conversations/",
+            {"title": "Permanent Failure Test"},
+            format="json",
+        )
+        self.assertEqual(conversation_response.status_code, 201)
+        conversation_id = conversation_response.json()["id"]
+
+        async def failing_stream(*args, **kwargs):
+            yield 'data: {"tool_calls": [{"index": 0, "id": "call_w", "type": "function", "function": {"name": "web_search", "arguments": "{\\"query\\":\\"restaurants\\"}"}}]}\n\n'
+            yield "data: [DONE]\n\n"
+
+        mock_stream_chat_completion.side_effect = failing_stream
+        mock_execute_tool.return_value = {
+            "error": "Web search is not available (duckduckgo-search not installed)",
+            "results": [],
+            "retryable": False,
+        }
+
+        response = self.client.post(
+            f"/api/chat/conversations/{conversation_id}/send_message/",
+            {"message": "Find restaurants near me"},
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        chunks = [
+            chunk.decode("utf-8")
+            if isinstance(chunk, (bytes, bytearray))
+            else str(chunk)
+            for chunk in response.streaming_content
+        ]
+        payload_lines = [
+            chunk.strip()[len("data: ") :]
+            for chunk in chunks
+            if chunk.strip().startswith("data: ")
+        ]
+        json_payloads = [
+            json.loads(payload) for payload in payload_lines if payload != "[DONE]"
+        ]
+
+        self.assertTrue(
+            any(
+                payload.get("error_category") == "tool_execution_error"
+                for payload in json_payloads
+            )
+        )
+        self.assertEqual(mock_stream_chat_completion.call_count, 1)
+        self.assertEqual(mock_execute_tool.call_count, 1)
--- a/backend/server/chat/views/init.py
+++ b/backend/server/chat/views/init.py
@@ -62,6 +62,9 @@ class ChatViewSet(viewsets.ModelViewSet):
            if message.role == "tool"
            and message.tool_call_id
            and not self._is_required_param_tool_error_message_content(message.content)
+            and not self._is_execution_failure_tool_error_message_content(
+                message.content
+            )
        }

        messages = [
@@ -76,6 +79,13 @@ class ChatViewSet(viewsets.ModelViewSet):
                and self._is_required_param_tool_error_message_content(message.content)
            ):
                continue
+            if (
+                message.role == "tool"
+                and self._is_execution_failure_tool_error_message_content(
+                    message.content
+                )
+            ):
+                continue

            payload = {
                "role": message.role,
@@ -152,6 +162,24 @@ class ChatViewSet(viewsets.ModelViewSet):
            )
        )

+    @classmethod
+    def _is_execution_failure_tool_error(cls, result):
+        if not isinstance(result, dict):
+            return False
+
+        error_text = result.get("error")
+        if not isinstance(error_text, str) or not error_text.strip():
+            return False
+
+        return not cls._is_required_param_tool_error(result)
+
+    @staticmethod
+    def _is_retryable_execution_failure(result):
+        if not isinstance(result, dict):
+            return False
+
+        return result.get("retryable", True) is not False
+
    @classmethod
    def _is_required_param_tool_error_message_content(cls, content):
        if not isinstance(content, str):
@@ -164,6 +192,18 @@ class ChatViewSet(viewsets.ModelViewSet):

        return cls._is_required_param_tool_error(parsed)

+    @classmethod
+    def _is_execution_failure_tool_error_message_content(cls, content):
+        if not isinstance(content, str):
+            return False
+
+        try:
+            parsed = json.loads(content)
+        except json.JSONDecodeError:
+            return False
+
+        return cls._is_execution_failure_tool_error(parsed)
+
    @staticmethod
    def _build_required_param_error_event(tool_name, result):
        tool_error = result.get("error") if isinstance(result, dict) else ""
@@ -190,6 +230,24 @@ class ChatViewSet(viewsets.ModelViewSet):
        normalized_error = error_text.strip().lower()
        return "location" in normalized_error

+    @staticmethod
+    def _is_search_places_geocode_error(tool_name, result):
+        if tool_name != "search_places" or not isinstance(result, dict):
+            return False
+
+        error_text = result.get("error")
+        if not isinstance(error_text, str):
+            return False
+
+        return error_text.strip().lower().startswith("could not geocode location")
+
+    @classmethod
+    def _is_search_places_location_retry_candidate_error(cls, tool_name, result):
+        return cls._is_search_places_missing_location_required_error(
+            tool_name,
+            result,
+        ) or cls._is_search_places_geocode_error(tool_name, result)
+
    @staticmethod
    def _build_search_places_location_clarification_message():
        return (
@@ -198,6 +256,21 @@ class ChatViewSet(viewsets.ModelViewSet):
            "activities, or lodging."
        )

+    @staticmethod
+    def _build_tool_execution_error_event(tool_name, result):
+        tool_error = (
+            (result or {}).get("error")
+            if isinstance(result, dict)
+            else "Tool execution failed"
+        )
+        return {
+            "error": (
+                f"The assistant could not complete '{tool_name}' ({tool_error}). "
+                "Please try again in a moment or adjust your request."
+            ),
+            "error_category": "tool_execution_error",
+        }
+
    @staticmethod
    def _normalize_trip_context_destination(destination):
        destination_text = (destination or "").strip()
@@ -420,11 +493,13 @@ class ChatViewSet(viewsets.ModelViewSet):
        )

        MAX_TOOL_ITERATIONS = 10
+        MAX_ALL_FAILURE_ROUNDS = 3

        async def event_stream():
            current_messages = list(llm_messages)
            encountered_error = False
            tool_iterations = 0
+            all_failure_rounds = 0

            while tool_iterations < MAX_TOOL_ITERATIONS:
                content_chunks = []
@@ -472,10 +547,11 @@ class ChatViewSet(viewsets.ModelViewSet):
                assistant_content = "".join(content_chunks)

                if tool_calls_accumulator:
-                    tool_iterations += 1
                    successful_tool_calls = []
                    successful_tool_messages = []
                    successful_tool_chat_entries = []
+                    first_execution_failure = None
+                    encountered_permanent_failure = False

                    for tool_call in tool_calls_accumulator:
                        function_payload = tool_call.get("function") or {}
@@ -519,7 +595,7 @@ class ChatViewSet(viewsets.ModelViewSet):
                            **prepared_arguments,
                        )

-                        if self._is_search_places_missing_location_required_error(
+                        if self._is_search_places_location_retry_candidate_error(
                            function_name,
                            result,
                        ):
@@ -552,7 +628,11 @@ class ChatViewSet(viewsets.ModelViewSet):
                                    **retry_arguments,
                                )

-                                if not self._is_required_param_tool_error(retry_result):
+                                if not self._is_required_param_tool_error(
+                                    retry_result
+                                ) and not self._is_execution_failure_tool_error(
+                                    retry_result
+                                ):
                                    result = retry_result
                                    tool_call_for_history = {
                                        **tool_call,
@@ -630,6 +710,13 @@ class ChatViewSet(viewsets.ModelViewSet):
                            yield "data: [DONE]\n\n"
                            return

+                        if self._is_execution_failure_tool_error(result):
+                            if first_execution_failure is None:
+                                first_execution_failure = (function_name, result)
+                            if not self._is_retryable_execution_failure(result):
+                                encountered_permanent_failure = True
+                            continue
+
                        result_content = serialize_tool_result(result)

                        successful_tool_calls.append(tool_call_for_history)
@@ -659,6 +746,41 @@ class ChatViewSet(viewsets.ModelViewSet):
                        }
                        yield f"data: {json.dumps(tool_event)}\n\n"

+                    if not successful_tool_calls and first_execution_failure:
+                        if encountered_permanent_failure:
+                            all_failure_rounds = MAX_ALL_FAILURE_ROUNDS
+                        else:
+                            all_failure_rounds += 1
+
+                        if all_failure_rounds >= MAX_ALL_FAILURE_ROUNDS:
+                            failed_tool_name, failed_tool_result = (
+                                first_execution_failure
+                            )
+                            error_event = self._build_tool_execution_error_event(
+                                failed_tool_name,
+                                failed_tool_result,
+                            )
+                            await sync_to_async(
+                                ChatMessage.objects.create,
+                                thread_sensitive=True,
+                            )(
+                                conversation=conversation,
+                                role="assistant",
+                                content=error_event["error"],
+                            )
+                            await sync_to_async(
+                                conversation.save,
+                                thread_sensitive=True,
+                            )(update_fields=["updated_at"])
+                            yield f"data: {json.dumps(error_event)}\n\n"
+                            yield "data: [DONE]\n\n"
+                            return
+
+                        continue
+
+                    all_failure_rounds = 0
+                    tool_iterations += 1
+
                    assistant_with_tools = {
                        "role": "assistant",
                        "content": assistant_content,
--- a/docs/docs/usage/usage.md
+++ b/docs/docs/usage/usage.md
@@ -30,7 +30,7 @@ You can set a **default AI provider and model** in **Settings** (under "Default

 Day suggestions (the AI-generated place recommendations for specific itinerary days) also respect your saved default provider and model. If no defaults are saved, the instance-level provider configured by the server admin is used.

-Provider errors (authentication, model not found, rate limits, invalid tool calls) are displayed as clear, actionable messages in the chat. If the AI attempts to use a tool incorrectly (e.g., missing required parameters), the error is surfaced once and the conversation stops cleanly rather than looping.
+Provider errors (authentication, model not found, rate limits, invalid tool calls) are displayed as clear, actionable messages in the chat. If the AI attempts to use a tool incorrectly (e.g., missing required parameters), the error is surfaced once and the conversation stops cleanly rather than looping. Tool execution failures (e.g., a place search that can't geocode, or an unavailable web search service) also stop with a single error message — the assistant won't keep retrying a broken tool.

 #### Collections

--- a/frontend/src/lib/components/AITravelChat.svelte
+++ b/frontend/src/lib/components/AITravelChat.svelte
@@ -6,6 +6,7 @@
 	import { addToast } from '$lib/toasts';

 	type ToolResultEntry = {
+		tool_call_id?: string;
 		name: string;
 		result: unknown;
 	};
@@ -319,21 +320,58 @@

 		try {
 			return {
+				tool_call_id: msg.tool_call_id,
 				name: msg.name || 'tool',
 				result: JSON.parse(msg.content)
 			};
 		} catch {
 			return {
+				tool_call_id: msg.tool_call_id,
 				name: msg.name || 'tool',
 				result: msg.content
 			};
 		}
 	}

+	function appendToolResultDedup(
+		toolResults: ToolResultEntry[] | undefined,
+		toolResult: ToolResultEntry
+	): ToolResultEntry[] {
+		const next = toolResults || [];
+		if (
+			toolResult.tool_call_id &&
+			next.some((existing) => existing.tool_call_id === toolResult.tool_call_id)
+		) {
+			return next;
+		}
+
+		return [...next, toolResult];
+	}
+
+	function uniqueToolResultsByCallId(toolResults: ToolResultEntry[] | undefined): ToolResultEntry[] {
+		if (!toolResults) {
+			return [];
+		}
+
+		const seen = new Set<string>();
+		const unique: ToolResultEntry[] = [];
+		for (const result of toolResults) {
+			if (result.tool_call_id) {
+				if (seen.has(result.tool_call_id)) {
+					continue;
+				}
+				seen.add(result.tool_call_id);
+			}
+			unique.push(result);
+		}
+
+		return unique;
+	}
+
 	function rebuildConversationMessages(rawMessages: ChatMessage[]): ChatMessage[] {
 		const rebuilt = rawMessages.map((msg) => ({
 			...msg,
-			tool_results: msg.tool_results ? [...msg.tool_results] : undefined
+			tool_results: undefined
 		}));

 		let activeAssistant: ChatMessage | null = null;
@@ -361,7 +399,10 @@
 				continue;
 			}

-			activeAssistant.tool_results = [...(activeAssistant.tool_results || []), parsedResult];
+			activeAssistant.tool_results = appendToolResultDedup(
+				activeAssistant.tool_results,
+				parsedResult
+			);

 			if (
 				toolCallIds.length > 0 &&
@@ -466,10 +507,14 @@

 						if (parsed.tool_result) {
 							const toolResult: ToolResultEntry = {
+								tool_call_id: parsed.tool_result.tool_call_id,
 								name: parsed.tool_result.name || parsed.tool_result.tool || 'tool',
 								result: parsed.tool_result.result
 							};
-							assistantMsg.tool_results = [...(assistantMsg.tool_results || []), toolResult];
+							assistantMsg.tool_results = appendToolResultDedup(
+								assistantMsg.tool_results,
+								toolResult
+							);
 							messages = [...messages];
 						}
 					} catch {
@@ -891,7 +936,7 @@
 											<div class="whitespace-pre-wrap">{msg.content}</div>
 											{#if msg.role === 'assistant' && msg.tool_results}
 												<div class="mt-2 space-y-2">
-													{#each msg.tool_results as result}
+													{#each uniqueToolResultsByCallId(msg.tool_results) as result}
 														{#if hasPlaceResults(result)}
 															<div class="grid gap-2">
 																{#each getPlaceResults(result) as place}