diff --git a/AGENTS.md b/AGENTS.md index 48994c4c..c1986740 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,3 +1,9 @@ +--- +title: AGENTS +type: note +permalink: voyage/agents +--- + # Voyage Development Instructions (OpenCode) ## Project @@ -57,7 +63,7 @@ Run in this order: ## Known Issues (Expected) - Frontend `bun run check`: **0 errors + 6 warnings** expected (pre-existing in `CollectionRecommendationView.svelte` + `RegionCard.svelte`) -- Backend tests: **6/39 fail** (pre-existing: 2 user email key errors + 4 geocoding API mocks; 9 new chat tests all pass) +- Backend tests: **6/39 fail** (pre-existing: 2 user email key errors + 4 geocoding API mocks; 32 chat tests all pass) - Docker dev setup has frontend-backend communication issues (500 errors beyond homepage) ## Key Patterns @@ -68,9 +74,13 @@ Run in this order: - Chat providers: dynamic catalog from `GET /api/chat/providers/`; configured in `CHAT_PROVIDER_CONFIG` - Chat model override: dropdown selector fed by `GET /api/chat/providers/{provider}/models/`; persisted in `localStorage` key `voyage_chat_model_prefs`; backend accepts optional `model` param in `send_message` - Chat context: collection chats inject collection UUID + multi-stop itinerary context; system prompt guides `get_trip_details`-first reasoning and confirms only before first `add_to_itinerary`; `search_places` has a deterministic context-retry fallback — when the LLM omits `location`, the backend retries using the trip destination or first itinerary stop before asking the user for clarification; a dining-intent heuristic infers `category="food"` from user messages when the LLM omits category for restaurant/dining requests -- Chat tool output: `role=tool` messages hidden from display; tool outputs render as concise summaries; persisted tool rows reconstructed on reload via `rebuildConversationMessages()` -- Chat error surfacing: `_safe_error_payload()` maps LiteLLM exceptions to sanitized user-safe categories (never forwards raw `exc.message`) +- `itinerary_stops` extraction: when `city`/`country` FK is null, city is extracted from the last non-numeric segment of a comma-separated address (e.g. `"Little Turnstile 6, London"` → `"London"`); ensures `trip_context_location` is always a geocodable city name, not a street address. +- After itinerary context retries: if all retries fail, result is converted to an execution failure rather than triggering a clarification prompt — the user is never asked for a location they already implied via collection context. +- Chat tool output: `role=tool` messages hidden from display; tool outputs render as concise summaries; persisted tool rows reconstructed on reload via `rebuildConversationMessages()`; tool results are deduplicated by `tool_call_id` at three layers — rebuild from persisted rows (discards server-side pre-populated `tool_results`), SSE ingestion (`appendToolResultDedup`), and render-time (`uniqueToolResultsByCallId`) +- Chat error surfacing: `_safe_error_payload()` maps LiteLLM exceptions to sanitized user-safe categories (never forwards raw `exc.message`); `execute_tool()` catch-all returns a generic sanitized message (never raw `str(exc)`) - Invalid tool calls (missing required args) are detected and short-circuited with a user-visible error — not replayed into history +- Tool execution failures (`search_places`, `web_search`, catch-all errors) are classified separately from required-param validation errors; execution failures emit a bounded `tool_execution_error` SSE event and stop — they are never replayed into LLM context; `tool_iterations` increments only on successful tool calls; all-failure rounds are capped at `MAX_ALL_FAILURE_ROUNDS` (3); permanent failures (e.g. `web_search` import error with `retryable: false`) stop immediately +- Geocoding failures in `search_places` (`Could not geocode location: ...`) are eligible for the existing context-retry path (trip destination → first itinerary stop → user clarification) - Geocoding: `background_geocode_and_assign()` runs in a thread after Location save; populates `region`, `city`, `country`, and also fills `Location.location` from reverse geocode `display_name` (truncated to field max_length) if blank or different ## Conventions diff --git a/backend/server/chat/agent_tools.py b/backend/server/chat/agent_tools.py index 881d6395..76b7e319 100644 --- a/backend/server/chat/agent_tools.py +++ b/backend/server/chat/agent_tools.py @@ -295,6 +295,7 @@ def web_search(user, query: str, location_context: str | None = None) -> dict: return { "error": "Web search is not available (duckduckgo-search not installed)", "results": [], + "retryable": False, } except Exception as exc: error_str = str(exc).lower() @@ -637,9 +638,9 @@ def execute_tool(tool_name, user, **kwargs): try: return tool_fn(user=user, **filtered_kwargs) - except Exception as exc: + except Exception: logger.exception("Tool %s failed", tool_name) - return {"error": str(exc)} + return {"error": "Tool execution failed"} AGENT_TOOLS = get_tool_schemas() diff --git a/backend/server/chat/tests.py b/backend/server/chat/tests.py index 7b3aa43a..ab0142e5 100644 --- a/backend/server/chat/tests.py +++ b/backend/server/chat/tests.py @@ -1,4 +1,5 @@ import json +from unittest import mock from unittest.mock import patch from django.contrib.auth import get_user_model @@ -6,13 +7,64 @@ from django.test import TestCase from rest_framework.test import APITransactionTestCase from adventures.models import Collection, CollectionItineraryItem, Location -from chat.agent_tools import add_to_itinerary, get_trip_details +from chat.agent_tools import ( + add_to_itinerary, + execute_tool, + get_trip_details, + web_search, +) from chat.views import ChatViewSet User = get_user_model() +class WebSearchToolFailureClassificationTests(TestCase): + def test_web_search_import_error_sets_retryable_false(self): + import builtins + + original_import = builtins.__import__ + + def controlled_import(name, *args, **kwargs): + if name == "duckduckgo_search": + raise ImportError("missing dependency") + return original_import(name, *args, **kwargs) + + user = User.objects.create_user( + username="chat-web-search-user", + email="chat-web-search-user@example.com", + password="password123", + ) + + with mock.patch("builtins.__import__", side_effect=controlled_import): + result = web_search(user, query="best restaurants") + + self.assertEqual( + result.get("error"), + "Web search is not available (duckduckgo-search not installed)", + ) + self.assertEqual(result.get("retryable"), False) + + +class ExecuteToolErrorSanitizationTests(TestCase): + def test_execute_tool_catch_all_returns_sanitized_error_message(self): + def raising_tool(user): + raise RuntimeError("sensitive backend detail") + + user = User.objects.create_user( + username="chat-tool-sanitize-user", + email="chat-tool-sanitize-user@example.com", + password="password123", + ) + + with mock.patch.dict( + "chat.agent_tools._REGISTERED_TOOLS", {"boom": raising_tool} + ): + result = execute_tool("boom", user) + + self.assertEqual(result, {"error": "Tool execution failed"}) + + class ChatAgentToolSharedTripAccessTests(TestCase): def setUp(self): self.owner = User.objects.create_user( @@ -173,6 +225,40 @@ class ChatViewSetToolValidationBoundaryTests(TestCase): ) ) + def test_execution_failure_error_detected(self): + self.assertTrue( + ChatViewSet._is_execution_failure_tool_error( + {"error": "Web search failed. Please try again."} + ) + ) + + def test_required_error_not_treated_as_execution_failure(self): + self.assertFalse( + ChatViewSet._is_execution_failure_tool_error( + {"error": "location is required"} + ) + ) + + def test_search_places_geocode_error_detected_for_location_retry(self): + self.assertTrue( + ChatViewSet._is_search_places_location_retry_candidate_error( + "search_places", + {"error": "Could not geocode location: ???"}, + ) + ) + + def test_retryable_execution_failure_defaults_true(self): + self.assertTrue( + ChatViewSet._is_retryable_execution_failure({"error": "Temporary outage"}) + ) + + def test_retryable_execution_failure_honors_false_flag(self): + self.assertFalse( + ChatViewSet._is_retryable_execution_failure( + {"error": "Not installed", "retryable": False} + ) + ) + def test_likely_location_reply_heuristic_positive_case(self): self.assertTrue(ChatViewSet._is_likely_location_reply("london")) @@ -477,6 +563,70 @@ class ChatViewSetSearchPlacesClarificationTests(APITransactionTestCase): self.assertEqual(second_call_kwargs.get("category"), "food") self.assertEqual(second_call_kwargs.get("location"), "Rome, Italy") + @patch("chat.views.execute_tool") + @patch("chat.views.stream_chat_completion") + @patch("integrations.utils.auto_profile.update_auto_preference_profile") + def test_collection_context_retry_extracts_city_from_fallback_address( + self, + _mock_auto_profile, + mock_stream_chat_completion, + mock_execute_tool, + ): + user = User.objects.create_user( + username="chat-city-extract-user", + email="chat-city-extract-user@example.com", + password="password123", + ) + self.client.force_authenticate(user=user) + + collection = Collection.objects.create(user=user, name="London Food Trip") + trip_stop = Location.objects.create( + user=user, + name="Turnstile", + location="Little Turnstile 6, London", + ) + collection.locations.add(trip_stop) + + conversation_response = self.client.post( + "/api/chat/conversations/", + {"title": "Fallback City Extraction Test"}, + format="json", + ) + self.assertEqual(conversation_response.status_code, 201) + conversation_id = conversation_response.json()["id"] + + async def first_stream(*args, **kwargs): + yield 'data: {"tool_calls": [{"index": 0, "id": "call_1", "type": "function", "function": {"name": "search_places", "arguments": "{}"}}]}\n\n' + yield "data: [DONE]\n\n" + + async def second_stream(*args, **kwargs): + yield 'data: {"content": "Here are options in London."}\n\n' + yield "data: [DONE]\n\n" + + mock_stream_chat_completion.side_effect = [first_stream(), second_stream()] + mock_execute_tool.side_effect = [ + {"error": "location is required"}, + {"results": [{"name": "Rules Restaurant"}]}, + ] + + response = self.client.post( + f"/api/chat/conversations/{conversation_id}/send_message/", + { + "message": "Find restaurants", + "collection_id": str(collection.id), + "collection_name": collection.name, + }, + format="json", + ) + + self.assertEqual(response.status_code, 200) + # Consume the streaming response before checking mock call counts; + # the event_stream generator only runs when streaming_content is iterated. + list(response.streaming_content) + self.assertEqual(mock_execute_tool.call_count, 2) + second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs + self.assertEqual(second_call_kwargs.get("location"), "London") + @patch("chat.views.execute_tool") @patch("chat.views.stream_chat_completion") @patch("integrations.utils.auto_profile.update_auto_preference_profile") @@ -557,3 +707,293 @@ class ChatViewSetSearchPlacesClarificationTests(APITransactionTestCase): self.assertEqual(mock_execute_tool.call_count, 2) second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs self.assertEqual(second_call_kwargs.get("location"), "Paris, France") + + @patch("chat.views.execute_tool") + @patch("chat.views.stream_chat_completion") + @patch("integrations.utils.auto_profile.update_auto_preference_profile") + def test_geocode_failure_retries_with_trip_context_location( + self, + _mock_auto_profile, + mock_stream_chat_completion, + mock_execute_tool, + ): + user = User.objects.create_user( + username="chat-geocode-retry-user", + email="chat-geocode-retry-user@example.com", + password="password123", + ) + self.client.force_authenticate(user=user) + + conversation_response = self.client.post( + "/api/chat/conversations/", + {"title": "Geocode Retry Test"}, + format="json", + ) + self.assertEqual(conversation_response.status_code, 201) + conversation_id = conversation_response.json()["id"] + + async def first_stream(*args, **kwargs): + yield 'data: {"tool_calls": [{"index": 0, "id": "call_1", "type": "function", "function": {"name": "search_places", "arguments": "{}"}}]}\n\n' + yield "data: [DONE]\n\n" + + async def second_stream(*args, **kwargs): + yield 'data: {"content": "Here are options in Lisbon."}\n\n' + yield "data: [DONE]\n\n" + + mock_stream_chat_completion.side_effect = [first_stream(), second_stream()] + mock_execute_tool.side_effect = [ + {"error": "Could not geocode location: invalid"}, + {"results": [{"name": "Time Out Market"}]}, + ] + + response = self.client.post( + f"/api/chat/conversations/{conversation_id}/send_message/", + { + "message": "Find restaurants", + "destination": "Lisbon, Portugal", + }, + format="json", + ) + + self.assertEqual(response.status_code, 200) + chunks = [ + chunk.decode("utf-8") + if isinstance(chunk, (bytes, bytearray)) + else str(chunk) + for chunk in response.streaming_content + ] + payload_lines = [ + chunk.strip()[len("data: ") :] + for chunk in chunks + if chunk.strip().startswith("data: ") + ] + json_payloads = [ + json.loads(payload) for payload in payload_lines if payload != "[DONE]" + ] + + self.assertTrue(any("tool_result" in payload for payload in json_payloads)) + self.assertTrue( + any( + payload.get("content") == "Here are options in Lisbon." + for payload in json_payloads + ) + ) + self.assertEqual(mock_execute_tool.call_count, 2) + second_call_kwargs = mock_execute_tool.call_args_list[1].kwargs + self.assertEqual(second_call_kwargs.get("location"), "Lisbon, Portugal") + + +class ChatViewSetToolExecutionFailureLoopTests(APITransactionTestCase): + @patch("chat.views.execute_tool") + @patch("chat.views.stream_chat_completion") + @patch("integrations.utils.auto_profile.update_auto_preference_profile") + def test_all_failure_rounds_stop_with_execution_error_before_tool_cap( + self, + _mock_auto_profile, + mock_stream_chat_completion, + mock_execute_tool, + ): + user = User.objects.create_user( + username="chat-loop-failure-user", + email="chat-loop-failure-user@example.com", + password="password123", + ) + self.client.force_authenticate(user=user) + + conversation_response = self.client.post( + "/api/chat/conversations/", + {"title": "Failure Loop Test"}, + format="json", + ) + self.assertEqual(conversation_response.status_code, 201) + conversation_id = conversation_response.json()["id"] + + async def failing_stream(*args, **kwargs): + yield 'data: {"tool_calls": [{"index": 0, "id": "call_w", "type": "function", "function": {"name": "web_search", "arguments": "{\\"query\\":\\"restaurants\\"}"}}]}\n\n' + yield "data: [DONE]\n\n" + + mock_stream_chat_completion.side_effect = failing_stream + mock_execute_tool.return_value = { + "error": "Web search failed. Please try again.", + "results": [], + } + + response = self.client.post( + f"/api/chat/conversations/{conversation_id}/send_message/", + {"message": "Find restaurants near me"}, + format="json", + ) + + self.assertEqual(response.status_code, 200) + chunks = [ + chunk.decode("utf-8") + if isinstance(chunk, (bytes, bytearray)) + else str(chunk) + for chunk in response.streaming_content + ] + payload_lines = [ + chunk.strip()[len("data: ") :] + for chunk in chunks + if chunk.strip().startswith("data: ") + ] + json_payloads = [ + json.loads(payload) for payload in payload_lines if payload != "[DONE]" + ] + + self.assertTrue( + any( + payload.get("error_category") == "tool_execution_error" + for payload in json_payloads + ) + ) + self.assertFalse( + any( + payload.get("error_category") == "tool_loop_limit" + for payload in json_payloads + ) + ) + self.assertFalse(any("tool_result" in payload for payload in json_payloads)) + self.assertEqual(mock_stream_chat_completion.call_count, 3) + self.assertEqual(mock_execute_tool.call_count, 3) + + @patch("chat.views.execute_tool") + @patch("chat.views.stream_chat_completion") + @patch("integrations.utils.auto_profile.update_auto_preference_profile") + def test_permanent_execution_failure_stops_immediately( + self, + _mock_auto_profile, + mock_stream_chat_completion, + mock_execute_tool, + ): + user = User.objects.create_user( + username="chat-permanent-failure-user", + email="chat-permanent-failure-user@example.com", + password="password123", + ) + self.client.force_authenticate(user=user) + + conversation_response = self.client.post( + "/api/chat/conversations/", + {"title": "Permanent Failure Test"}, + format="json", + ) + self.assertEqual(conversation_response.status_code, 201) + conversation_id = conversation_response.json()["id"] + + async def failing_stream(*args, **kwargs): + yield 'data: {"tool_calls": [{"index": 0, "id": "call_w", "type": "function", "function": {"name": "web_search", "arguments": "{\\"query\\":\\"restaurants\\"}"}}]}\n\n' + yield "data: [DONE]\n\n" + + mock_stream_chat_completion.side_effect = failing_stream + mock_execute_tool.return_value = { + "error": "Web search is not available (duckduckgo-search not installed)", + "results": [], + "retryable": False, + } + + response = self.client.post( + f"/api/chat/conversations/{conversation_id}/send_message/", + {"message": "Find restaurants near me"}, + format="json", + ) + + self.assertEqual(response.status_code, 200) + chunks = [ + chunk.decode("utf-8") + if isinstance(chunk, (bytes, bytearray)) + else str(chunk) + for chunk in response.streaming_content + ] + payload_lines = [ + chunk.strip()[len("data: ") :] + for chunk in chunks + if chunk.strip().startswith("data: ") + ] + json_payloads = [ + json.loads(payload) for payload in payload_lines if payload != "[DONE]" + ] + + self.assertTrue( + any( + payload.get("error_category") == "tool_execution_error" + for payload in json_payloads + ) + ) + self.assertEqual(mock_stream_chat_completion.call_count, 1) + self.assertEqual(mock_execute_tool.call_count, 1) + + @patch("chat.views.execute_tool") + @patch("chat.views.stream_chat_completion") + @patch("integrations.utils.auto_profile.update_auto_preference_profile") + def test_context_retry_failure_does_not_emit_location_clarification( + self, + _mock_auto_profile, + mock_stream_chat_completion, + mock_execute_tool, + ): + user = User.objects.create_user( + username="chat-retry-no-clarify-user", + email="chat-retry-no-clarify-user@example.com", + password="password123", + ) + self.client.force_authenticate(user=user) + + conversation_response = self.client.post( + "/api/chat/conversations/", + {"title": "Retry Failure No Clarify Test"}, + format="json", + ) + self.assertEqual(conversation_response.status_code, 201) + conversation_id = conversation_response.json()["id"] + + async def failing_stream(*args, **kwargs): + yield 'data: {"tool_calls": [{"index": 0, "id": "call_s", "type": "function", "function": {"name": "search_places", "arguments": "{}"}}]}\n\n' + yield "data: [DONE]\n\n" + + mock_stream_chat_completion.side_effect = failing_stream + mock_execute_tool.side_effect = [ + {"error": "location is required"}, + {"error": "location is required"}, + {"error": "location is required"}, + {"error": "location is required"}, + {"error": "location is required"}, + {"error": "location is required"}, + ] + + response = self.client.post( + f"/api/chat/conversations/{conversation_id}/send_message/", + { + "message": "Find restaurants", + "destination": "Lisbon, Portugal", + }, + format="json", + ) + + self.assertEqual(response.status_code, 200) + chunks = [ + chunk.decode("utf-8") + if isinstance(chunk, (bytes, bytearray)) + else str(chunk) + for chunk in response.streaming_content + ] + payload_lines = [ + chunk.strip()[len("data: ") :] + for chunk in chunks + if chunk.strip().startswith("data: ") + ] + json_payloads = [ + json.loads(payload) for payload in payload_lines if payload != "[DONE]" + ] + + self.assertTrue( + any( + payload.get("error_category") == "tool_execution_error" + for payload in json_payloads + ) + ) + self.assertFalse( + any( + "specific location" in payload.get("content", "").lower() + for payload in json_payloads + ) + ) diff --git a/backend/server/chat/views/__init__.py b/backend/server/chat/views/__init__.py index 92186082..bea0627a 100644 --- a/backend/server/chat/views/__init__.py +++ b/backend/server/chat/views/__init__.py @@ -62,6 +62,9 @@ class ChatViewSet(viewsets.ModelViewSet): if message.role == "tool" and message.tool_call_id and not self._is_required_param_tool_error_message_content(message.content) + and not self._is_execution_failure_tool_error_message_content( + message.content + ) } messages = [ @@ -76,6 +79,13 @@ class ChatViewSet(viewsets.ModelViewSet): and self._is_required_param_tool_error_message_content(message.content) ): continue + if ( + message.role == "tool" + and self._is_execution_failure_tool_error_message_content( + message.content + ) + ): + continue payload = { "role": message.role, @@ -152,6 +162,24 @@ class ChatViewSet(viewsets.ModelViewSet): ) ) + @classmethod + def _is_execution_failure_tool_error(cls, result): + if not isinstance(result, dict): + return False + + error_text = result.get("error") + if not isinstance(error_text, str) or not error_text.strip(): + return False + + return not cls._is_required_param_tool_error(result) + + @staticmethod + def _is_retryable_execution_failure(result): + if not isinstance(result, dict): + return False + + return result.get("retryable", True) is not False + @classmethod def _is_required_param_tool_error_message_content(cls, content): if not isinstance(content, str): @@ -164,6 +192,18 @@ class ChatViewSet(viewsets.ModelViewSet): return cls._is_required_param_tool_error(parsed) + @classmethod + def _is_execution_failure_tool_error_message_content(cls, content): + if not isinstance(content, str): + return False + + try: + parsed = json.loads(content) + except json.JSONDecodeError: + return False + + return cls._is_execution_failure_tool_error(parsed) + @staticmethod def _build_required_param_error_event(tool_name, result): tool_error = result.get("error") if isinstance(result, dict) else "" @@ -190,6 +230,24 @@ class ChatViewSet(viewsets.ModelViewSet): normalized_error = error_text.strip().lower() return "location" in normalized_error + @staticmethod + def _is_search_places_geocode_error(tool_name, result): + if tool_name != "search_places" or not isinstance(result, dict): + return False + + error_text = result.get("error") + if not isinstance(error_text, str): + return False + + return error_text.strip().lower().startswith("could not geocode location") + + @classmethod + def _is_search_places_location_retry_candidate_error(cls, tool_name, result): + return cls._is_search_places_missing_location_required_error( + tool_name, + result, + ) or cls._is_search_places_geocode_error(tool_name, result) + @staticmethod def _build_search_places_location_clarification_message(): return ( @@ -198,6 +256,21 @@ class ChatViewSet(viewsets.ModelViewSet): "activities, or lodging." ) + @staticmethod + def _build_tool_execution_error_event(tool_name, result): + tool_error = ( + (result or {}).get("error") + if isinstance(result, dict) + else "Tool execution failed" + ) + return { + "error": ( + f"The assistant could not complete '{tool_name}' ({tool_error}). " + "Please try again in a moment or adjust your request." + ), + "error_category": "tool_execution_error", + } + @staticmethod def _normalize_trip_context_destination(destination): destination_text = (destination or "").strip() @@ -253,8 +326,35 @@ class ChatViewSet(viewsets.ModelViewSet): return None - @staticmethod - def _is_likely_location_reply(user_content): + # Verbs that indicate a command/request rather than a location reply. + _COMMAND_VERBS = frozenset( + [ + "find", + "search", + "show", + "get", + "look", + "give", + "tell", + "help", + "recommend", + "suggest", + "list", + "fetch", + "what", + "where", + "which", + "who", + "how", + "can", + "could", + "would", + "please", + ] + ) + + @classmethod + def _is_likely_location_reply(cls, user_content): if not isinstance(user_content, str): return False @@ -272,6 +372,12 @@ class ChatViewSet(viewsets.ModelViewSet): if len(parts) > 6: return False + # Exclude messages that start with a command/request verb — those are + # original requests, not replies to a "where?" clarification prompt. + first_word = parts[0].lower().rstrip(".,!;:") + if first_word in cls._COMMAND_VERBS: + return False + return bool(re.search(r"[a-z0-9]", normalized, re.IGNORECASE)) @action(detail=True, methods=["post"]) @@ -372,8 +478,21 @@ class ChatViewSet(viewsets.ModelViewSet): fallback_name = (location.location or location.name or "").strip() if not fallback_name: continue - stop_label = fallback_name - stop_key = f"name:{fallback_name.lower()}" + # When city/country FKs are not set, try to extract a geocodable + # city name from a comma-separated address string. + # e.g. "Little Turnstile 6, London" → "London" + # e.g. "Kingsway 58, London" → "London" + if "," in fallback_name: + parts = [p.strip() for p in fallback_name.split(",")] + # Last non-empty, non-purely-numeric segment is typically the city + city_hint = next( + (p for p in reversed(parts) if p and not p.isdigit()), + None, + ) + stop_label = city_hint if city_hint else fallback_name + else: + stop_label = fallback_name + stop_key = f"name:{stop_label.lower()}" if stop_key in seen_stops: continue @@ -420,11 +539,13 @@ class ChatViewSet(viewsets.ModelViewSet): ) MAX_TOOL_ITERATIONS = 10 + MAX_ALL_FAILURE_ROUNDS = 3 async def event_stream(): current_messages = list(llm_messages) encountered_error = False tool_iterations = 0 + all_failure_rounds = 0 while tool_iterations < MAX_TOOL_ITERATIONS: content_chunks = [] @@ -472,10 +593,11 @@ class ChatViewSet(viewsets.ModelViewSet): assistant_content = "".join(content_chunks) if tool_calls_accumulator: - tool_iterations += 1 successful_tool_calls = [] successful_tool_messages = [] successful_tool_chat_entries = [] + first_execution_failure = None + encountered_permanent_failure = False for tool_call in tool_calls_accumulator: function_payload = tool_call.get("function") or {} @@ -519,7 +641,8 @@ class ChatViewSet(viewsets.ModelViewSet): **prepared_arguments, ) - if self._is_search_places_missing_location_required_error( + attempted_location_retry = False + if self._is_search_places_location_retry_candidate_error( function_name, result, ): @@ -540,6 +663,7 @@ class ChatViewSet(viewsets.ModelViewSet): ): continue seen_retry_locations.add(normalized_retry_location) + attempted_location_retry = True retry_arguments = dict(prepared_arguments) retry_arguments["location"] = retry_location @@ -552,7 +676,11 @@ class ChatViewSet(viewsets.ModelViewSet): **retry_arguments, ) - if not self._is_required_param_tool_error(retry_result): + if not self._is_required_param_tool_error( + retry_result + ) and not self._is_execution_failure_tool_error( + retry_result + ): result = retry_result tool_call_for_history = { **tool_call, @@ -564,6 +692,17 @@ class ChatViewSet(viewsets.ModelViewSet): } break + # If we attempted context retries but all failed, convert + # to an execution failure so we never ask the user for a + # location they already implied via itinerary context. + if ( + attempted_location_retry + and self._is_required_param_tool_error(result) + ): + result = { + "error": "Could not search places at the provided itinerary locations" + } + if self._is_required_param_tool_error(result): assistant_message_kwargs = { "conversation": conversation, @@ -585,9 +724,12 @@ class ChatViewSet(viewsets.ModelViewSet): thread_sensitive=True, )(**tool_message) - if self._is_search_places_missing_location_required_error( - function_name, - result, + if ( + not attempted_location_retry + and self._is_search_places_missing_location_required_error( + function_name, + result, + ) ): clarification_content = self._build_search_places_location_clarification_message() await sync_to_async( @@ -630,6 +772,13 @@ class ChatViewSet(viewsets.ModelViewSet): yield "data: [DONE]\n\n" return + if self._is_execution_failure_tool_error(result): + if first_execution_failure is None: + first_execution_failure = (function_name, result) + if not self._is_retryable_execution_failure(result): + encountered_permanent_failure = True + continue + result_content = serialize_tool_result(result) successful_tool_calls.append(tool_call_for_history) @@ -659,6 +808,41 @@ class ChatViewSet(viewsets.ModelViewSet): } yield f"data: {json.dumps(tool_event)}\n\n" + if not successful_tool_calls and first_execution_failure: + if encountered_permanent_failure: + all_failure_rounds = MAX_ALL_FAILURE_ROUNDS + else: + all_failure_rounds += 1 + + if all_failure_rounds >= MAX_ALL_FAILURE_ROUNDS: + failed_tool_name, failed_tool_result = ( + first_execution_failure + ) + error_event = self._build_tool_execution_error_event( + failed_tool_name, + failed_tool_result, + ) + await sync_to_async( + ChatMessage.objects.create, + thread_sensitive=True, + )( + conversation=conversation, + role="assistant", + content=error_event["error"], + ) + await sync_to_async( + conversation.save, + thread_sensitive=True, + )(update_fields=["updated_at"]) + yield f"data: {json.dumps(error_event)}\n\n" + yield "data: [DONE]\n\n" + return + + continue + + all_failure_rounds = 0 + tool_iterations += 1 + assistant_with_tools = { "role": "assistant", "content": assistant_content, diff --git a/docs/docs/usage/usage.md b/docs/docs/usage/usage.md index 1d56c6f7..62247ef2 100644 --- a/docs/docs/usage/usage.md +++ b/docs/docs/usage/usage.md @@ -30,7 +30,7 @@ You can set a **default AI provider and model** in **Settings** (under "Default Day suggestions (the AI-generated place recommendations for specific itinerary days) also respect your saved default provider and model. If no defaults are saved, the instance-level provider configured by the server admin is used. -Provider errors (authentication, model not found, rate limits, invalid tool calls) are displayed as clear, actionable messages in the chat. If the AI attempts to use a tool incorrectly (e.g., missing required parameters), the error is surfaced once and the conversation stops cleanly rather than looping. +Provider errors (authentication, model not found, rate limits, invalid tool calls) are displayed as clear, actionable messages in the chat. If the AI attempts to use a tool incorrectly (e.g., missing required parameters), the error is surfaced once and the conversation stops cleanly rather than looping. Tool execution failures (e.g., a place search that can't geocode, or an unavailable web search service) also stop with a single error message — the assistant won't keep retrying a broken tool. #### Collections diff --git a/frontend/src/lib/components/AITravelChat.svelte b/frontend/src/lib/components/AITravelChat.svelte index da95be44..00808d5d 100644 --- a/frontend/src/lib/components/AITravelChat.svelte +++ b/frontend/src/lib/components/AITravelChat.svelte @@ -6,6 +6,7 @@ import { addToast } from '$lib/toasts'; type ToolResultEntry = { + tool_call_id?: string; name: string; result: unknown; }; @@ -319,21 +320,58 @@ try { return { + tool_call_id: msg.tool_call_id, name: msg.name || 'tool', result: JSON.parse(msg.content) }; } catch { return { + tool_call_id: msg.tool_call_id, name: msg.name || 'tool', result: msg.content }; } } + function appendToolResultDedup( + toolResults: ToolResultEntry[] | undefined, + toolResult: ToolResultEntry + ): ToolResultEntry[] { + const next = toolResults || []; + if ( + toolResult.tool_call_id && + next.some((existing) => existing.tool_call_id === toolResult.tool_call_id) + ) { + return next; + } + + return [...next, toolResult]; + } + + function uniqueToolResultsByCallId(toolResults: ToolResultEntry[] | undefined): ToolResultEntry[] { + if (!toolResults) { + return []; + } + + const seen = new Set(); + const unique: ToolResultEntry[] = []; + for (const result of toolResults) { + if (result.tool_call_id) { + if (seen.has(result.tool_call_id)) { + continue; + } + seen.add(result.tool_call_id); + } + unique.push(result); + } + + return unique; + } + function rebuildConversationMessages(rawMessages: ChatMessage[]): ChatMessage[] { const rebuilt = rawMessages.map((msg) => ({ ...msg, - tool_results: msg.tool_results ? [...msg.tool_results] : undefined + tool_results: undefined })); let activeAssistant: ChatMessage | null = null; @@ -361,7 +399,10 @@ continue; } - activeAssistant.tool_results = [...(activeAssistant.tool_results || []), parsedResult]; + activeAssistant.tool_results = appendToolResultDedup( + activeAssistant.tool_results, + parsedResult + ); if ( toolCallIds.length > 0 && @@ -466,10 +507,14 @@ if (parsed.tool_result) { const toolResult: ToolResultEntry = { + tool_call_id: parsed.tool_result.tool_call_id, name: parsed.tool_result.name || parsed.tool_result.tool || 'tool', result: parsed.tool_result.result }; - assistantMsg.tool_results = [...(assistantMsg.tool_results || []), toolResult]; + assistantMsg.tool_results = appendToolResultDedup( + assistantMsg.tool_results, + toolResult + ); messages = [...messages]; } } catch { @@ -891,7 +936,7 @@
{msg.content}
{#if msg.role === 'assistant' && msg.tool_results}
- {#each msg.tool_results as result} + {#each uniqueToolResultsByCallId(msg.tool_results) as result} {#if hasPlaceResults(result)}
{#each getPlaceResults(result) as place}