|
11 | 11 |
|
12 | 12 | from ..telemetry import metrics_constants as constants
|
13 | 13 | from ..types.content import Message
|
14 |
| -from ..types.streaming import Metrics, Usage |
| 14 | +from ..types.event_loop import Metrics, Usage |
15 | 15 | from ..types.tools import ToolUse
|
16 | 16 |
|
17 | 17 | logger = logging.getLogger(__name__)
|
@@ -264,6 +264,21 @@ def update_usage(self, usage: Usage) -> None:
|
264 | 264 | self.accumulated_usage["outputTokens"] += usage["outputTokens"]
|
265 | 265 | self.accumulated_usage["totalTokens"] += usage["totalTokens"]
|
266 | 266 |
|
| 267 | + # Handle optional cached token metrics |
| 268 | + if "cacheReadInputTokens" in usage: |
| 269 | + cache_read_tokens = usage["cacheReadInputTokens"] |
| 270 | + self._metrics_client.event_loop_cache_read_input_tokens.record(cache_read_tokens) |
| 271 | + self.accumulated_usage["cacheReadInputTokens"] = ( |
| 272 | + self.accumulated_usage.get("cacheReadInputTokens", 0) + cache_read_tokens |
| 273 | + ) |
| 274 | + |
| 275 | + if "cacheWriteInputTokens" in usage: |
| 276 | + cache_write_tokens = usage["cacheWriteInputTokens"] |
| 277 | + self._metrics_client.event_loop_cache_write_input_tokens.record(cache_write_tokens) |
| 278 | + self.accumulated_usage["cacheWriteInputTokens"] = ( |
| 279 | + self.accumulated_usage.get("cacheWriteInputTokens", 0) + cache_write_tokens |
| 280 | + ) |
| 281 | + |
267 | 282 | def update_metrics(self, metrics: Metrics) -> None:
|
268 | 283 | """Update the accumulated performance metrics with new metrics data.
|
269 | 284 |
|
@@ -325,11 +340,21 @@ def _metrics_summary_to_lines(event_loop_metrics: EventLoopMetrics, allowed_name
|
325 | 340 | f"├─ Cycles: total={summary['total_cycles']}, avg_time={summary['average_cycle_time']:.3f}s, "
|
326 | 341 | f"total_time={summary['total_duration']:.3f}s"
|
327 | 342 | )
|
328 |
| - yield ( |
329 |
| - f"├─ Tokens: in={summary['accumulated_usage']['inputTokens']}, " |
330 |
| - f"out={summary['accumulated_usage']['outputTokens']}, " |
331 |
| - f"total={summary['accumulated_usage']['totalTokens']}" |
332 |
| - ) |
| 343 | + |
| 344 | + # Build token display with optional cached tokens |
| 345 | + token_parts = [ |
| 346 | + f"in={summary['accumulated_usage']['inputTokens']}", |
| 347 | + f"out={summary['accumulated_usage']['outputTokens']}", |
| 348 | + f"total={summary['accumulated_usage']['totalTokens']}", |
| 349 | + ] |
| 350 | + |
| 351 | + # Add cached token info if present |
| 352 | + if summary["accumulated_usage"].get("cacheReadInputTokens"): |
| 353 | + token_parts.append(f"cache_read={summary['accumulated_usage']['cacheReadInputTokens']}") |
| 354 | + if summary["accumulated_usage"].get("cacheWriteInputTokens"): |
| 355 | + token_parts.append(f"cache_write={summary['accumulated_usage']['cacheWriteInputTokens']}") |
| 356 | + |
| 357 | + yield f"├─ Tokens: {', '.join(token_parts)}" |
333 | 358 | yield f"├─ Bedrock Latency: {summary['accumulated_metrics']['latencyMs']}ms"
|
334 | 359 |
|
335 | 360 | yield "├─ Tool Usage:"
|
@@ -421,6 +446,8 @@ class MetricsClient:
|
421 | 446 | event_loop_latency: Histogram
|
422 | 447 | event_loop_input_tokens: Histogram
|
423 | 448 | event_loop_output_tokens: Histogram
|
| 449 | + event_loop_cache_read_input_tokens: Histogram |
| 450 | + event_loop_cache_write_input_tokens: Histogram |
424 | 451 |
|
425 | 452 | tool_call_count: Counter
|
426 | 453 | tool_success_count: Counter
|
@@ -474,3 +501,9 @@ def create_instruments(self) -> None:
|
474 | 501 | self.event_loop_output_tokens = self.meter.create_histogram(
|
475 | 502 | name=constants.STRANDS_EVENT_LOOP_OUTPUT_TOKENS, unit="token"
|
476 | 503 | )
|
| 504 | + self.event_loop_cache_read_input_tokens = self.meter.create_histogram( |
| 505 | + name=constants.STRANDS_EVENT_LOOP_CACHE_READ_INPUT_TOKENS, unit="token" |
| 506 | + ) |
| 507 | + self.event_loop_cache_write_input_tokens = self.meter.create_histogram( |
| 508 | + name=constants.STRANDS_EVENT_LOOP_CACHE_WRITE_INPUT_TOKENS, unit="token" |
| 509 | + ) |
0 commit comments