Skip to content

Commit 4b5c5a7

Browse files
committed
feat: switch to a default behavior to recover from max tokens reached
1 parent 66c4c07 commit 4b5c5a7

16 files changed

+417
-569
lines changed

src/strands/agent/agent.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from ..tools.registry import ToolRegistry
3838
from ..tools.watcher import ToolWatcher
3939
from ..types.content import ContentBlock, Message, Messages
40-
from ..types.exceptions import ContextWindowOverflowException, MaxTokensReachedException
40+
from ..types.exceptions import ContextWindowOverflowException
4141
from ..types.tools import ToolResult, ToolUse
4242
from ..types.traces import AttributeValue
4343
from .agent_result import AgentResult
@@ -582,21 +582,18 @@ async def _execute_event_loop_cycle(self, invocation_state: dict[str, Any]) -> A
582582
)
583583
async for event in events:
584584
yield event
585-
return
585+
586586
except ContextWindowOverflowException as e:
587587
# Try reducing the context size and retrying
588-
self.conversation_manager.reduce_context(agent=self, e=e)
589-
except MaxTokensReachedException as e:
590-
# Recover conversation state after token limit exceeded, then continue with next cycle
591-
await self.conversation_manager.handle_token_limit_reached(agent=self, e=e)
588+
self.conversation_manager.reduce_context(self, e=e)
592589

593-
# Sync agent after handling exception to keep conversation_manager_state up to date in the session
594-
if self._session_manager:
595-
self._session_manager.sync_agent(self)
590+
# Sync agent after reduce_context to keep conversation_manager_state up to date in the session
591+
if self._session_manager:
592+
self._session_manager.sync_agent(self)
596593

597-
events = self._execute_event_loop_cycle(invocation_state)
598-
async for event in events:
599-
yield event
594+
events = self._execute_event_loop_cycle(invocation_state)
595+
async for event in events:
596+
yield event
600597

601598
def _record_tool_execution(
602599
self,

src/strands/agent/conversation_manager/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,12 @@
1515

1616
from .conversation_manager import ConversationManager
1717
from .null_conversation_manager import NullConversationManager
18-
from .recover_tool_use_on_max_tokens_reached import recover_tool_use_on_max_tokens_reached
1918
from .sliding_window_conversation_manager import SlidingWindowConversationManager
2019
from .summarizing_conversation_manager import SummarizingConversationManager
2120

2221
__all__ = [
2322
"ConversationManager",
2423
"NullConversationManager",
25-
"recover_tool_use_on_max_tokens_reached",
2624
"SlidingWindowConversationManager",
2725
"SummarizingConversationManager",
2826
]

src/strands/agent/conversation_manager/conversation_manager.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from typing import TYPE_CHECKING, Any, Optional
55

66
from ...types.content import Message
7-
from ...types.exceptions import MaxTokensReachedException
87

98
if TYPE_CHECKING:
109
from ...agent.agent import Agent
@@ -87,17 +86,3 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None, **kwargs
8786
**kwargs: Additional keyword arguments for future extensibility.
8887
"""
8988
pass
90-
91-
async def handle_token_limit_reached(self, agent: "Agent", e: MaxTokensReachedException, **kwargs: Any) -> None:
92-
"""Called when MaxTokensReachedException is thrown to recover conversation state.
93-
94-
This method should implement recovery strategies when the token limit is exceeded and the message array
95-
may be in a broken state.
96-
97-
Args:
98-
agent: The agent whose conversation state will be recovered.
99-
This list is modified in-place.
100-
e: The MaxTokensReachedException that triggered the recovery.
101-
**kwargs: Additional keyword arguments for future extensibility.
102-
"""
103-
raise e

src/strands/agent/conversation_manager/recover_tool_use_on_max_tokens_reached.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

src/strands/agent/conversation_manager/sliding_window_conversation_manager.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77
from ...agent.agent import Agent
88

99
from ...types.content import Messages
10-
from ...types.exceptions import ContextWindowOverflowException, MaxTokensReachedException
10+
from ...types.exceptions import ContextWindowOverflowException
1111
from .conversation_manager import ConversationManager
12-
from .recover_tool_use_on_max_tokens_reached import recover_tool_use_on_max_tokens_reached
1312

1413
logger = logging.getLogger(__name__)
1514

@@ -113,16 +112,6 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None, **kwargs
113112
# Overwrite message history
114113
messages[:] = messages[trim_index:]
115114

116-
async def handle_token_limit_reached(self, agent: "Agent", e: MaxTokensReachedException, **kwargs: Any) -> None:
117-
"""Apply sliding window strategy for token limit recovery.
118-
119-
Args:
120-
agent: The agent whose conversation state will be recovered.
121-
e: The MaxTokensReachedException that triggered the recovery.
122-
**kwargs: Additional keyword arguments for future extensibility.
123-
"""
124-
await recover_tool_use_on_max_tokens_reached(agent, e)
125-
126115
def _truncate_tool_results(self, messages: Messages, msg_idx: int) -> bool:
127116
"""Truncate tool results in a message to reduce context size.
128117

src/strands/agent/conversation_manager/summarizing_conversation_manager.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
from typing_extensions import override
77

88
from ...types.content import Message
9-
from ...types.exceptions import ContextWindowOverflowException, MaxTokensReachedException
9+
from ...types.exceptions import ContextWindowOverflowException
1010
from .conversation_manager import ConversationManager
11-
from .recover_tool_use_on_max_tokens_reached import recover_tool_use_on_max_tokens_reached
1211

1312
if TYPE_CHECKING:
1413
from ..agent import Agent
@@ -167,16 +166,6 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None, **kwargs
167166
logger.error("Summarization failed: %s", summarization_error)
168167
raise summarization_error from e
169168

170-
async def handle_token_limit_reached(self, agent: "Agent", e: MaxTokensReachedException, **kwargs: Any) -> None:
171-
"""Apply summarization strategy for token limit recovery.
172-
173-
Args:
174-
agent: The agent whose conversation state will be recovered.
175-
e: The MaxTokensReachedException that triggered the recovery.
176-
**kwargs: Additional keyword arguments for future extensibility.
177-
"""
178-
await recover_tool_use_on_max_tokens_reached(agent, e)
179-
180169
def _generate_summary(self, messages: List[Message], agent: "Agent") -> Message:
181170
"""Generate a summary of the provided messages.
182171
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""Message recovery utilities for handling max token limit scenarios.
2+
3+
This module provides functionality to recover and clean up incomplete messages that occur
4+
when model responses are truncated due to maximum token limits being reached. It specifically
5+
handles cases where tool use blocks are incomplete or malformed due to truncation.
6+
"""
7+
8+
import logging
9+
10+
from ..types.content import ContentBlock, Message
11+
from ..types.tools import ToolUse
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def recover_message_on_max_tokens_reached(message: Message) -> Message:
17+
"""Recover and clean up incomplete messages when max token limits are reached.
18+
19+
When a model response is truncated due to maximum token limits, tool use blocks may be
20+
incomplete or malformed. This function inspects the message content and:
21+
22+
1. Identifies incomplete tool use blocks (missing name, input, or toolUseId)
23+
2. Replaces incomplete tool uses with informative error messages
24+
3. Preserves all valid content blocks (text and complete tool uses)
25+
4. Returns a cleaned message suitable for conversation history
26+
27+
This recovery mechanism ensures that the conversation can continue gracefully even when
28+
model responses are truncated, providing clear feedback about what happened.
29+
30+
Args:
31+
message: The potentially incomplete message from the model that was truncated
32+
due to max token limits.
33+
34+
Returns:
35+
A cleaned Message with incomplete tool uses replaced by explanatory text content.
36+
The returned message maintains the same role as the input message.
37+
38+
Example:
39+
If a message contains an incomplete tool use like:
40+
```
41+
{"toolUse": {"name": "calculator"}} # missing input and toolUseId
42+
```
43+
44+
It will be replaced with:
45+
```
46+
{"text": "The selected tool calculator's tool use was incomplete due to maximum token limits being reached."}
47+
```
48+
"""
49+
logger.info("handling max_tokens stop reason - inspecting incomplete message for invalid tool uses")
50+
51+
valid_content: list[ContentBlock] = []
52+
for content in message["content"] or []:
53+
tool_use: ToolUse | None = content.get("toolUse")
54+
if not tool_use:
55+
valid_content.append(content)
56+
continue
57+
58+
# Check if tool use is incomplete (missing or empty required fields)
59+
tool_name = tool_use.get("name")
60+
if tool_name and tool_use.get("input") and tool_use.get("toolUseId"):
61+
# As far as we can tell, tool use is valid if this condition is true
62+
valid_content.append(content)
63+
continue
64+
65+
# Tool use is incomplete due to max_tokens truncation
66+
display_name = tool_name if tool_name else "<unknown>"
67+
logger.warning("tool_name=<%s> | replacing with error message due to max_tokens truncation.", display_name)
68+
69+
valid_content.append(
70+
{
71+
"text": f"The selected tool {display_name}'s tool use was incomplete due "
72+
f"to maximum token limits being reached."
73+
}
74+
)
75+
76+
return {"content": valid_content, "role": message["role"]}

src/strands/event_loop/event_loop.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
)
3737
from ..types.streaming import Metrics, StopReason
3838
from ..types.tools import ToolChoice, ToolChoiceAuto, ToolConfig, ToolGenerator, ToolResult, ToolUse
39+
from ._recover_message_on_max_tokens_reached import recover_message_on_max_tokens_reached
3940
from .streaming import stream_messages
4041

4142
if TYPE_CHECKING:
@@ -156,6 +157,9 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
156157
)
157158
)
158159

160+
if stop_reason == "max_tokens":
161+
message = recover_message_on_max_tokens_reached(message)
162+
159163
if model_invoke_span:
160164
tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason)
161165
break # Success! Break out of retry loop
@@ -192,6 +196,19 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
192196
raise e
193197

194198
try:
199+
# Add message in trace and mark the end of the stream messages trace
200+
stream_trace.add_message(message)
201+
stream_trace.end()
202+
203+
# Add the response message to the conversation
204+
agent.messages.append(message)
205+
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=message))
206+
yield {"callback": {"message": message}}
207+
208+
# Update metrics
209+
agent.event_loop_metrics.update_usage(usage)
210+
agent.event_loop_metrics.update_metrics(metrics)
211+
195212
if stop_reason == "max_tokens":
196213
"""
197214
Handle max_tokens limit reached by the model.
@@ -205,21 +222,8 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
205222
"Agent has reached an unrecoverable state due to max_tokens limit. "
206223
"For more information see: "
207224
"https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
208-
),
209-
incomplete_message=message,
225+
)
210226
)
211-
# Add message in trace and mark the end of the stream messages trace
212-
stream_trace.add_message(message)
213-
stream_trace.end()
214-
215-
# Add the response message to the conversation
216-
agent.messages.append(message)
217-
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=message))
218-
yield {"callback": {"message": message}}
219-
220-
# Update metrics
221-
agent.event_loop_metrics.update_usage(usage)
222-
agent.event_loop_metrics.update_metrics(metrics)
223227

224228
# If the model is requesting to use tools
225229
if stop_reason == "tool_use":

src/strands/types/exceptions.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
from typing import Any
44

5-
from strands.types.content import Message
6-
75

86
class EventLoopException(Exception):
97
"""Exception raised by the event loop."""
@@ -28,14 +26,12 @@ class MaxTokensReachedException(Exception):
2826
the complexity of the response, or when the model naturally reaches its configured output limit during generation.
2927
"""
3028

31-
def __init__(self, message: str, incomplete_message: Message):
29+
def __init__(self, message: str):
3230
"""Initialize the exception with an error message and the incomplete message object.
3331
3432
Args:
3533
message: The error message describing the token limit issue
36-
incomplete_message: The valid Message object with incomplete content due to token limits
3734
"""
38-
self.incomplete_message = incomplete_message
3935
super().__init__(message)
4036

4137

tests/strands/agent/conversation_manager/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)