Skip to content

Commit 29b2127

Browse files
fix(event_loop): ensure tool_use content blocks are valid after max_tokens to prevent unrecoverable state (#607)
1 parent ec5304c commit 29b2127

File tree

6 files changed

+420
-45
lines changed

6 files changed

+420
-45
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Message recovery utilities for handling max token limit scenarios.
2+
3+
This module provides functionality to recover and clean up incomplete messages that occur
4+
when model responses are truncated due to maximum token limits being reached. It specifically
5+
handles cases where tool use blocks are incomplete or malformed due to truncation.
6+
"""
7+
8+
import logging
9+
10+
from ..types.content import ContentBlock, Message
11+
from ..types.tools import ToolUse
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def recover_message_on_max_tokens_reached(message: Message) -> Message:
17+
"""Recover and clean up messages when max token limits are reached.
18+
19+
When a model response is truncated due to maximum token limits, all tool use blocks
20+
should be replaced with informative error messages since they may be incomplete or
21+
unreliable. This function inspects the message content and:
22+
23+
1. Identifies all tool use blocks (regardless of validity)
24+
2. Replaces all tool uses with informative error messages
25+
3. Preserves all non-tool content blocks (text, images, etc.)
26+
4. Returns a cleaned message suitable for conversation history
27+
28+
This recovery mechanism ensures that the conversation can continue gracefully even when
29+
model responses are truncated, providing clear feedback about what happened and preventing
30+
potentially incomplete or corrupted tool executions.
31+
32+
Args:
33+
message: The potentially incomplete message from the model that was truncated
34+
due to max token limits.
35+
36+
Returns:
37+
A cleaned Message with all tool uses replaced by explanatory text content.
38+
The returned message maintains the same role as the input message.
39+
40+
Example:
41+
If a message contains any tool use (complete or incomplete):
42+
```
43+
{"toolUse": {"name": "calculator", "input": {"expression": "2+2"}, "toolUseId": "123"}}
44+
```
45+
46+
It will be replaced with:
47+
```
48+
{"text": "The selected tool calculator's tool use was incomplete due to maximum token limits being reached."}
49+
```
50+
"""
51+
logger.info("handling max_tokens stop reason - replacing all tool uses with error messages")
52+
53+
valid_content: list[ContentBlock] = []
54+
for content in message["content"] or []:
55+
tool_use: ToolUse | None = content.get("toolUse")
56+
if not tool_use:
57+
valid_content.append(content)
58+
continue
59+
60+
# Replace all tool uses with error messages when max_tokens is reached
61+
display_name = tool_use.get("name") or "<unknown>"
62+
logger.warning("tool_name=<%s> | replacing with error message due to max_tokens truncation.", display_name)
63+
64+
valid_content.append(
65+
{
66+
"text": f"The selected tool {display_name}'s tool use was incomplete due "
67+
f"to maximum token limits being reached."
68+
}
69+
)
70+
71+
return {"content": valid_content, "role": message["role"]}

src/strands/event_loop/event_loop.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
)
3737
from ..types.streaming import Metrics, StopReason
3838
from ..types.tools import ToolChoice, ToolChoiceAuto, ToolConfig, ToolGenerator, ToolResult, ToolUse
39+
from ._recover_message_on_max_tokens_reached import recover_message_on_max_tokens_reached
3940
from .streaming import stream_messages
4041

4142
if TYPE_CHECKING:
@@ -156,6 +157,9 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
156157
)
157158
)
158159

160+
if stop_reason == "max_tokens":
161+
message = recover_message_on_max_tokens_reached(message)
162+
159163
if model_invoke_span:
160164
tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason)
161165
break # Success! Break out of retry loop
@@ -192,6 +196,19 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
192196
raise e
193197

194198
try:
199+
# Add message in trace and mark the end of the stream messages trace
200+
stream_trace.add_message(message)
201+
stream_trace.end()
202+
203+
# Add the response message to the conversation
204+
agent.messages.append(message)
205+
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=message))
206+
yield {"callback": {"message": message}}
207+
208+
# Update metrics
209+
agent.event_loop_metrics.update_usage(usage)
210+
agent.event_loop_metrics.update_metrics(metrics)
211+
195212
if stop_reason == "max_tokens":
196213
"""
197214
Handle max_tokens limit reached by the model.
@@ -205,21 +222,8 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
205222
"Agent has reached an unrecoverable state due to max_tokens limit. "
206223
"For more information see: "
207224
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
208-
),
209-
incomplete_message=message,
225+
)
210226
)
211-
# Add message in trace and mark the end of the stream messages trace
212-
stream_trace.add_message(message)
213-
stream_trace.end()
214-
215-
# Add the response message to the conversation
216-
agent.messages.append(message)
217-
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=message))
218-
yield {"callback": {"message": message}}
219-
220-
# Update metrics
221-
agent.event_loop_metrics.update_usage(usage)
222-
agent.event_loop_metrics.update_metrics(metrics)
223227

224228
# If the model is requesting to use tools
225229
if stop_reason == "tool_use":

src/strands/types/exceptions.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
from typing import Any
44

5-
from strands.types.content import Message
6-
75

86
class EventLoopException(Exception):
97
"""Exception raised by the event loop."""
@@ -28,14 +26,12 @@ class MaxTokensReachedException(Exception):
2826
the complexity of the response, or when the model naturally reaches its configured output limit during generation.
2927
"""
3028

31-
def __init__(self, message: str, incomplete_message: Message):
29+
def __init__(self, message: str):
3230
"""Initialize the exception with an error message and the incomplete message object.
3331
3432
Args:
3533
message: The error message describing the token limit issue
36-
incomplete_message: The valid Message object with incomplete content due to token limits
3734
"""
38-
self.incomplete_message = incomplete_message
3935
super().__init__(message)
4036

4137

tests/strands/event_loop/test_event_loop.py

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,10 @@ async def test_event_loop_cycle_text_response_error(
305305
await alist(stream)
306306

307307

308+
@patch("strands.event_loop.event_loop.recover_message_on_max_tokens_reached")
308309
@pytest.mark.asyncio
309310
async def test_event_loop_cycle_tool_result(
311+
mock_recover_message,
310312
agent,
311313
model,
312314
system_prompt,
@@ -339,6 +341,9 @@ async def test_event_loop_cycle_tool_result(
339341

340342
assert tru_stop_reason == exp_stop_reason and tru_message == exp_message and tru_request_state == exp_request_state
341343

344+
# Verify that recover_message_on_max_tokens_reached was NOT called for tool_use stop reason
345+
mock_recover_message.assert_not_called()
346+
342347
model.stream.assert_called_with(
343348
[
344349
{"role": "user", "content": [{"text": "Hello"}]},
@@ -568,42 +573,44 @@ async def test_event_loop_cycle_max_tokens_exception(
568573
agenerator,
569574
alist,
570575
):
571-
"""Test that max_tokens stop reason raises MaxTokensReachedException."""
576+
"""Test that max_tokens stop reason calls _recover_message_on_max_tokens_reached then MaxTokensReachedException."""
572577

573-
# Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495
574-
model.stream.return_value = agenerator(
575-
[
576-
{
577-
"contentBlockStart": {
578-
"start": {
579-
"toolUse": {},
578+
model.stream.side_effect = [
579+
agenerator(
580+
[
581+
{
582+
"contentBlockStart": {
583+
"start": {
584+
"toolUse": {
585+
"toolUseId": "t1",
586+
"name": "asdf",
587+
"input": {}, # empty
588+
},
589+
},
580590
},
581591
},
582-
},
583-
{"contentBlockStop": {}},
584-
{"messageStop": {"stopReason": "max_tokens"}},
585-
]
586-
)
592+
{"contentBlockStop": {}},
593+
{"messageStop": {"stopReason": "max_tokens"}},
594+
]
595+
),
596+
]
587597

588598
# Call event_loop_cycle, expecting it to raise MaxTokensReachedException
589-
with pytest.raises(MaxTokensReachedException) as exc_info:
599+
expected_message = (
600+
"Agent has reached an unrecoverable state due to max_tokens limit. "
601+
"For more information see: "
602+
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
603+
)
604+
with pytest.raises(MaxTokensReachedException, match=expected_message):
590605
stream = strands.event_loop.event_loop.event_loop_cycle(
591606
agent=agent,
592607
invocation_state={},
593608
)
594609
await alist(stream)
595610

596611
# Verify the exception message contains the expected content
597-
expected_message = (
598-
"Agent has reached an unrecoverable state due to max_tokens limit. "
599-
"For more information see: "
600-
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
601-
)
602-
assert str(exc_info.value) == expected_message
603-
604-
# Verify that the message has not been appended to the messages array
605-
assert len(agent.messages) == 1
606-
assert exc_info.value.incomplete_message not in agent.messages
612+
assert len(agent.messages) == 2
613+
assert "tool use was incomplete due" in agent.messages[1]["content"][0]["text"]
607614

608615

609616
@patch("strands.event_loop.event_loop.get_tracer")

0 commit comments

Comments
 (0)