Add tests for NullObservation with cause > 0 and clarify event flow (#7315)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Engel Nyst 2025-03-18 16:21:09 +01:00 committed by GitHub
parent 3150af1ad7
commit 259140ffc9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 127 additions and 12 deletions

View File

@ -271,9 +271,9 @@ class AgentController:
await self._react_to_exception(reported)
def should_step(self, event: Event) -> bool:
"""
Whether the agent should take a step based on an event. In general,
the agent should take a step if it receives a message from the user,
"""Whether the agent should take a step based on an event.
In general, the agent should take a step if it receives a message from the user,
or observes something in the environment (after acting).
"""
# it might be the delegate's day in the sun
@ -296,7 +296,8 @@ class AgentController:
if (
isinstance(event, NullObservation)
and event.cause is not None
and event.cause > 0
and event.cause
> 0 # NullObservation has cause > 0 (RecallAction), not 0 (user message)
):
return True
if isinstance(event, AgentStateChangedObservation) or isinstance(
@ -312,7 +313,6 @@ class AgentController:
Args:
event (Event): The incoming event to process.
"""
# If we have a delegate that is not finished or errored, forward events to it
if self.delegate is not None:
delegate_state = self.delegate.get_agent_state()
@ -469,7 +469,7 @@ class AgentController:
await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
def _reset(self) -> None:
"""Resets the agent controller"""
"""Resets the agent controller."""
# Runnable actions need an Observation
# make sure there is an Observation with the tool call metadata to be recognized by the agent
# otherwise the pending action is found in history, but it's incomplete without an obs with tool result
@ -621,7 +621,8 @@ class AgentController:
)
def end_delegate(self) -> None:
"""Ends the currently active delegate (e.g., if it is finished or errored)
"""Ends the currently active delegate (e.g., if it is finished or errored).
so that this controller can resume normal operation.
"""
if self.delegate is None:
@ -1029,8 +1030,9 @@ class AgentController:
)
def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
"""Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
and ensuring the first user message is always included.
"""Cuts history roughly in half when context window is exceeded.
It preserves action-observation pairs and ensures that the first user message is always included.
The algorithm:
1. Cut history in half
@ -1183,8 +1185,7 @@ class AgentController:
return False
def _first_user_message(self) -> MessageAction | None:
"""
Get the first user message for this agent.
"""Get the first user message for this agent.
For regular agents, this is the first user message from the beginning (start_id=0).
For delegate agents, this is the first user message after the delegate's start_id.

View File

@ -20,6 +20,7 @@ from openhands.events.observation import (
ErrorObservation,
)
from openhands.events.observation.agent import RecallObservation
from openhands.events.observation.empty import NullObservation
from openhands.events.serialization import event_to_dict
from openhands.llm import LLM
from openhands.llm.metrics import Metrics, TokenUsage
@ -1038,3 +1039,117 @@ async def test_first_user_message_with_identical_content():
) # This should be False, but may be True if there's a bug
await controller.close()
async def test_agent_controller_processes_null_observation_with_cause():
"""Test that AgentController processes NullObservation events with a cause value.
And that the agent's step method is called as a result.
"""
# Create an in-memory file store and real event stream
file_store = InMemoryFileStore()
event_stream = EventStream(sid='test-session', file_store=file_store)
# Create a Memory instance - not used directly in this test but needed for setup
Memory(event_stream=event_stream, sid='test-session')
# Create a mock agent with necessary attributes
mock_agent = MagicMock(spec=Agent)
mock_agent.llm = MagicMock(spec=LLM)
mock_agent.llm.metrics = Metrics()
mock_agent.llm.config = AppConfig().get_llm_config()
# Create a controller with the mock agent
controller = AgentController(
agent=mock_agent,
event_stream=event_stream,
max_iterations=10,
sid='test-session',
)
# Patch the controller's step method to track calls
with patch.object(controller, 'step') as mock_step:
# Create and add the first user message (will have ID 0)
user_message = MessageAction(content='First user message')
user_message._source = EventSource.USER # type: ignore[attr-defined]
event_stream.add_event(user_message, EventSource.USER)
# Give it a little time to process
await asyncio.sleep(0.3)
# Get all events from the stream
events = list(event_stream.get_events())
# Events in the stream:
# Event 0: MessageAction, ID: 0, Cause: None, Source: EventSource.USER, Content: First user message
# Event 1: RecallAction, ID: 1, Cause: None, Source: EventSource.USER, Content: N/A
# Event 2: NullObservation, ID: 2, Cause: 1, Source: EventSource.ENVIRONMENT, Content:
# Event 3: AgentStateChangedObservation, ID: 3, Cause: None, Source: EventSource.ENVIRONMENT, Content:
# Find the RecallAction event (should be automatically created)
recall_actions = [event for event in events if isinstance(event, RecallAction)]
assert len(recall_actions) > 0, 'No RecallAction was created'
recall_action = recall_actions[0]
# Find any NullObservation events
null_obs_events = [
event for event in events if isinstance(event, NullObservation)
]
assert len(null_obs_events) > 0, 'No NullObservation was created'
null_observation = null_obs_events[0]
# Verify the NullObservation has a cause that points to the RecallAction
assert null_observation.cause is not None, 'NullObservation cause is None'
assert (
null_observation.cause == recall_action.id
), f'Expected cause={recall_action.id}, got cause={null_observation.cause}'
# Verify the controller's should_step method returns True for this observation
assert controller.should_step(
null_observation
), 'should_step should return True for this NullObservation'
# Verify the controller's step method was called
# This means the controller processed the NullObservation
assert mock_step.called, "Controller's step method was not called"
# Now test with a NullObservation that has cause=0
# Create a NullObservation with cause = 0 (pointing to the first user message)
null_observation_zero = NullObservation(content='Test observation with cause=0')
null_observation_zero._cause = 0 # type: ignore[attr-defined]
# Verify the controller's should_step method would return False for this observation
assert not controller.should_step(
null_observation_zero
), 'should_step should return False for NullObservation with cause=0'
def test_agent_controller_should_step_with_null_observation_cause_zero():
"""Test that AgentController's should_step method returns False for NullObservation with cause = 0."""
# Create a mock event stream
file_store = InMemoryFileStore()
event_stream = EventStream(sid='test-session', file_store=file_store)
# Create a mock agent
mock_agent = MagicMock(spec=Agent)
# Create an agent controller
controller = AgentController(
agent=mock_agent,
event_stream=event_stream,
max_iterations=10,
sid='test-session',
)
# Create a NullObservation with cause = 0
# This should not happen, but if it does, the controller shouldn't step.
null_observation = NullObservation(content='Test observation')
null_observation._cause = 0 # type: ignore[attr-defined]
# Check if should_step returns False for this observation
result = controller.should_step(null_observation)
# It should return False since we only want to step on NullObservation with cause > 0
assert (
result is False
), 'should_step should return False for NullObservation with cause = 0'

View File

@ -61,7 +61,6 @@ def prompt_dir(tmp_path):
@pytest.mark.asyncio
async def test_memory_on_event_exception_handling(memory, event_stream):
"""Test that exceptions in Memory.on_event are properly handled via status callback."""
# Create a dummy agent for the controller
agent = MagicMock(spec=Agent)
agent.llm = MagicMock(spec=LLM)