Skip to content

Realtime: fix item parsing #1119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 38 additions & 27 deletions examples/realtime/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ def get_weather(city: str) -> str:
)


def _truncate_str(s: str, max_length: int) -> str:
if len(s) > max_length:
return s[:max_length] + "..."
return s


class Example:
def __init__(self) -> None:
self.ui = AppUI()
Expand Down Expand Up @@ -70,33 +76,38 @@ async def on_audio_recorded(self, audio_bytes: bytes) -> None:
await self.session.send_audio(audio_bytes)

async def _on_event(self, event: RealtimeSessionEvent) -> None:
if event.type == "agent_start":
self.ui.add_transcript(f"Agent started: {event.agent.name}")
elif event.type == "agent_end":
self.ui.add_transcript(f"Agent ended: {event.agent.name}")
elif event.type == "handoff":
self.ui.add_transcript(f"Handoff from {event.from_agent.name} to {event.to_agent.name}")
elif event.type == "tool_start":
self.ui.add_transcript(f"Tool started: {event.tool.name}")
elif event.type == "tool_end":
self.ui.add_transcript(f"Tool ended: {event.tool.name}; output: {event.output}")
elif event.type == "audio_end":
self.ui.add_transcript("Audio ended")
elif event.type == "audio":
np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
self.ui.play_audio(np_audio)
elif event.type == "audio_interrupted":
self.ui.add_transcript("Audio interrupted")
elif event.type == "error":
self.ui.add_transcript(f"Error: {event.error}")
elif event.type == "history_updated":
pass
elif event.type == "history_added":
pass
elif event.type == "raw_model_event":
self.ui.log_message(f"Raw model event: {event.data}")
else:
self.ui.log_message(f"Unknown event type: {event.type}")
try:
if event.type == "agent_start":
self.ui.add_transcript(f"Agent started: {event.agent.name}")
elif event.type == "agent_end":
self.ui.add_transcript(f"Agent ended: {event.agent.name}")
elif event.type == "handoff":
self.ui.add_transcript(
f"Handoff from {event.from_agent.name} to {event.to_agent.name}"
)
elif event.type == "tool_start":
self.ui.add_transcript(f"Tool started: {event.tool.name}")
elif event.type == "tool_end":
self.ui.add_transcript(f"Tool ended: {event.tool.name}; output: {event.output}")
elif event.type == "audio_end":
self.ui.add_transcript("Audio ended")
elif event.type == "audio":
np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
self.ui.play_audio(np_audio)
elif event.type == "audio_interrupted":
self.ui.add_transcript("Audio interrupted")
elif event.type == "error":
pass
elif event.type == "history_updated":
pass
elif event.type == "history_added":
pass
elif event.type == "raw_model_event":
self.ui.log_message(f"Raw model event: {_truncate_str(str(event.data), 50)}")
else:
self.ui.log_message(f"Unknown event type: {event.type}")
except Exception as e:
self.ui.log_message(f"Error processing event: {_truncate_str(str(e), 50)}")


if __name__ == "__main__":
Expand Down
2 changes: 0 additions & 2 deletions src/agents/realtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
RealtimeToolStart,
)
from .items import (
AssistantAudio,
AssistantMessageItem,
AssistantText,
InputAudio,
Expand Down Expand Up @@ -123,7 +122,6 @@
"RealtimeToolEnd",
"RealtimeToolStart",
# Items
"AssistantAudio",
"AssistantMessageItem",
"AssistantText",
"InputAudio",
Expand Down
17 changes: 4 additions & 13 deletions src/agents/realtime/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class InputText(BaseModel):
type: Literal["input_text"] = "input_text"
text: str
text: str | None = None

# Allow extra data
model_config = ConfigDict(extra="allow")
Expand All @@ -24,16 +24,7 @@ class InputAudio(BaseModel):

class AssistantText(BaseModel):
type: Literal["text"] = "text"
text: str

# Allow extra data
model_config = ConfigDict(extra="allow")


class AssistantAudio(BaseModel):
type: Literal["audio"] = "audio"
audio: str | None = None
transcript: str | None = None
text: str | None = None

# Allow extra data
model_config = ConfigDict(extra="allow")
Expand All @@ -55,7 +46,7 @@ class UserMessageItem(BaseModel):
previous_item_id: str | None = None
type: Literal["message"] = "message"
role: Literal["user"] = "user"
content: list[InputText | InputAudio]
content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]

# Allow extra data
model_config = ConfigDict(extra="allow")
Expand All @@ -67,7 +58,7 @@ class AssistantMessageItem(BaseModel):
type: Literal["message"] = "message"
role: Literal["assistant"] = "assistant"
status: Literal["in_progress", "completed", "incomplete"] | None = None
content: list[AssistantText | AssistantAudio]
content: list[AssistantText]

# Allow extra data
model_config = ConfigDict(extra="allow")
Expand Down
35 changes: 25 additions & 10 deletions src/agents/realtime/openai_realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,15 +388,8 @@ async def _handle_conversation_item(
self, item: ConversationItem, previous_item_id: str | None
) -> None:
"""Handle conversation item creation/retrieval events."""
message_item: RealtimeMessageItem = TypeAdapter(RealtimeMessageItem).validate_python(
{
"item_id": item.id or "",
"previous_item_id": previous_item_id,
"type": item.type,
"role": item.role,
"content": item.content,
"status": "in_progress",
}
message_item = _ConversionHelper.conversation_item_to_realtime_message_item(
item, previous_item_id
)
await self._emit_event(RealtimeModelItemUpdatedEvent(item=message_item))

Expand All @@ -418,6 +411,8 @@ async def _cancel_response(self) -> None:

async def _handle_ws_event(self, event: dict[str, Any]):
try:
if "previous_item_id" in event and event["previous_item_id"] is None:
event["previous_item_id"] = "" # TODO (rm) remove
parsed: OpenAIRealtimeServerEvent = TypeAdapter(
OpenAIRealtimeServerEvent
).validate_python(event)
Expand Down Expand Up @@ -465,7 +460,8 @@ async def _handle_ws_event(self, event: dict[str, Any]):
previous_item_id = (
parsed.previous_item_id if parsed.type == "conversation.item.created" else None
)
await self._handle_conversation_item(parsed.item, previous_item_id)
if parsed.item.type == "message":
await self._handle_conversation_item(parsed.item, previous_item_id)
elif (
parsed.type == "conversation.item.input_audio_transcription.completed"
or parsed.type == "conversation.item.truncated"
Expand Down Expand Up @@ -567,3 +563,22 @@ def _tools_to_session_tools(self, tools: list[Tool]) -> list[OpenAISessionTool]:
)
)
return converted_tools


class _ConversionHelper:
@classmethod
def conversation_item_to_realtime_message_item(
cls, item: ConversationItem, previous_item_id: str | None
) -> RealtimeMessageItem:
return TypeAdapter(RealtimeMessageItem).validate_python(
{
"item_id": item.id or "",
"previous_item_id": previous_item_id,
"type": item.type,
"role": item.role,
"content": (
[content.model_dump() for content in item.content] if item.content else []
),
"status": "in_progress",
},
)
80 changes: 80 additions & 0 deletions tests/realtime/test_item_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from openai.types.beta.realtime.conversation_item import ConversationItem
from openai.types.beta.realtime.conversation_item_content import ConversationItemContent

from agents.realtime.items import (
AssistantMessageItem,
RealtimeMessageItem,
SystemMessageItem,
UserMessageItem,
)
from agents.realtime.openai_realtime import _ConversionHelper


def test_user_message_conversion() -> None:
item = ConversationItem(
id="123",
type="message",
role="user",
content=[
ConversationItemContent(
id=None, audio=None, text=None, transcript=None, type="input_text"
)
],
)

converted: RealtimeMessageItem = _ConversionHelper.conversation_item_to_realtime_message_item(
item, None
)

assert isinstance(converted, UserMessageItem)

item = ConversationItem(
id="123",
type="message",
role="user",
content=[
ConversationItemContent(
id=None, audio=None, text=None, transcript=None, type="input_audio"
)
],
)

converted = _ConversionHelper.conversation_item_to_realtime_message_item(item, None)

assert isinstance(converted, UserMessageItem)


def test_assistant_message_conversion() -> None:
item = ConversationItem(
id="123",
type="message",
role="assistant",
content=[
ConversationItemContent(id=None, audio=None, text=None, transcript=None, type="text")
],
)

converted: RealtimeMessageItem = _ConversionHelper.conversation_item_to_realtime_message_item(
item, None
)

assert isinstance(converted, AssistantMessageItem)


def test_system_message_conversion() -> None:
item = ConversationItem(
id="123",
type="message",
role="system",
content=[
ConversationItemContent(
id=None, audio=None, text=None, transcript=None, type="input_text"
)
],
)

converted: RealtimeMessageItem = _ConversionHelper.conversation_item_to_realtime_message_item(
item, None
)

assert isinstance(converted, SystemMessageItem)
14 changes: 7 additions & 7 deletions tests/realtime/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ async def test_item_updated_event_updates_existing_item(self, mock_model, mock_a
# Check that item was updated
assert len(session._history) == 1
updated_item = cast(AssistantMessageItem, session._history[0])
assert cast(AssistantText, updated_item.content[0]).text == "Updated"
assert updated_item.content[0].text == "Updated"

# Should have 2 events: raw + history updated (not added)
assert session._event_queue.qsize() == 2
Expand Down Expand Up @@ -524,7 +524,7 @@ def test_update_existing_item_by_id(self):
# Item should be updated
result_item = cast(AssistantMessageItem, new_history[0])
assert result_item.item_id == "item_1"
assert cast(AssistantText, result_item.content[0]).text == "Updated"
assert result_item.content[0].text == "Updated"

def test_update_existing_item_preserves_order(self):
"""Test that updating existing item preserves its position in history"""
Expand Down Expand Up @@ -557,13 +557,13 @@ def test_update_existing_item_preserves_order(self):

# Middle item should be updated
updated_result = cast(AssistantMessageItem, new_history[1])
assert cast(AssistantText, updated_result.content[0]).text == "Updated Second"
assert updated_result.content[0].text == "Updated Second"

# Other items should be unchanged
item1_result = cast(AssistantMessageItem, new_history[0])
item3_result = cast(AssistantMessageItem, new_history[2])
assert cast(AssistantText, item1_result.content[0]).text == "First"
assert cast(AssistantText, item3_result.content[0]).text == "Third"
assert item1_result.content[0].text == "First"
assert item3_result.content[0].text == "Third"

def test_insert_new_item_after_previous_item(self):
"""Test inserting new item after specified previous_item_id"""
Expand Down Expand Up @@ -598,7 +598,7 @@ def test_insert_new_item_after_previous_item(self):

# Content should be correct
item2_result = cast(AssistantMessageItem, new_history[1])
assert cast(AssistantText, item2_result.content[0]).text == "Second"
assert item2_result.content[0].text == "Second"

def test_insert_new_item_after_nonexistent_previous_item(self):
"""Test that item with nonexistent previous_item_id gets added to end"""
Expand Down Expand Up @@ -701,7 +701,7 @@ def test_complex_insertion_scenario(self):
assert len(history) == 4
assert [item.item_id for item in history] == ["A", "B", "D", "C"]
itemB_result = cast(AssistantMessageItem, history[1])
assert cast(AssistantText, itemB_result.content[0]).text == "Updated B"
assert itemB_result.content[0].text == "Updated B"


# Test 3: Tool call execution flow (_handle_tool_call method)
Expand Down