From 259140ffc9ead19d36292d2a0cecef1a06e70798 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 18 Mar 2025 16:21:09 +0100 Subject: [PATCH] Add tests for NullObservation with cause > 0 and clarify event flow (#7315) Co-authored-by: openhands --- openhands/controller/agent_controller.py | 23 ++--- tests/unit/test_agent_controller.py | 115 +++++++++++++++++++++++ tests/unit/test_memory.py | 1 - 3 files changed, 127 insertions(+), 12 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index b4571c5ff4..c3d82f2c93 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -271,9 +271,9 @@ class AgentController: await self._react_to_exception(reported) def should_step(self, event: Event) -> bool: - """ - Whether the agent should take a step based on an event. In general, - the agent should take a step if it receives a message from the user, + """Whether the agent should take a step based on an event. + + In general, the agent should take a step if it receives a message from the user, or observes something in the environment (after acting). """ # it might be the delegate's day in the sun @@ -296,7 +296,8 @@ class AgentController: if ( isinstance(event, NullObservation) and event.cause is not None - and event.cause > 0 + and event.cause + > 0 # NullObservation has cause > 0 (RecallAction), not 0 (user message) ): return True if isinstance(event, AgentStateChangedObservation) or isinstance( @@ -312,7 +313,6 @@ class AgentController: Args: event (Event): The incoming event to process. """ - # If we have a delegate that is not finished or errored, forward events to it if self.delegate is not None: delegate_state = self.delegate.get_agent_state() @@ -469,7 +469,7 @@ class AgentController: await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT) def _reset(self) -> None: - """Resets the agent controller""" + """Resets the agent controller.""" # Runnable actions need an Observation # make sure there is an Observation with the tool call metadata to be recognized by the agent # otherwise the pending action is found in history, but it's incomplete without an obs with tool result @@ -621,7 +621,8 @@ class AgentController: ) def end_delegate(self) -> None: - """Ends the currently active delegate (e.g., if it is finished or errored) + """Ends the currently active delegate (e.g., if it is finished or errored). + so that this controller can resume normal operation. """ if self.delegate is None: @@ -1029,8 +1030,9 @@ class AgentController: ) def _apply_conversation_window(self, events: list[Event]) -> list[Event]: - """Cuts history roughly in half when context window is exceeded, preserving action-observation pairs - and ensuring the first user message is always included. + """Cuts history roughly in half when context window is exceeded. + + It preserves action-observation pairs and ensures that the first user message is always included. The algorithm: 1. Cut history in half @@ -1183,8 +1185,7 @@ class AgentController: return False def _first_user_message(self) -> MessageAction | None: - """ - Get the first user message for this agent. + """Get the first user message for this agent. For regular agents, this is the first user message from the beginning (start_id=0). For delegate agents, this is the first user message after the delegate's start_id. diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index dda09da893..fc59069bc9 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -20,6 +20,7 @@ from openhands.events.observation import ( ErrorObservation, ) from openhands.events.observation.agent import RecallObservation +from openhands.events.observation.empty import NullObservation from openhands.events.serialization import event_to_dict from openhands.llm import LLM from openhands.llm.metrics import Metrics, TokenUsage @@ -1038,3 +1039,117 @@ async def test_first_user_message_with_identical_content(): ) # This should be False, but may be True if there's a bug await controller.close() + + +async def test_agent_controller_processes_null_observation_with_cause(): + """Test that AgentController processes NullObservation events with a cause value. + + And that the agent's step method is called as a result. + """ + # Create an in-memory file store and real event stream + file_store = InMemoryFileStore() + event_stream = EventStream(sid='test-session', file_store=file_store) + + # Create a Memory instance - not used directly in this test but needed for setup + Memory(event_stream=event_stream, sid='test-session') + + # Create a mock agent with necessary attributes + mock_agent = MagicMock(spec=Agent) + mock_agent.llm = MagicMock(spec=LLM) + mock_agent.llm.metrics = Metrics() + mock_agent.llm.config = AppConfig().get_llm_config() + + # Create a controller with the mock agent + controller = AgentController( + agent=mock_agent, + event_stream=event_stream, + max_iterations=10, + sid='test-session', + ) + + # Patch the controller's step method to track calls + with patch.object(controller, 'step') as mock_step: + # Create and add the first user message (will have ID 0) + user_message = MessageAction(content='First user message') + user_message._source = EventSource.USER # type: ignore[attr-defined] + event_stream.add_event(user_message, EventSource.USER) + + # Give it a little time to process + await asyncio.sleep(0.3) + + # Get all events from the stream + events = list(event_stream.get_events()) + + # Events in the stream: + # Event 0: MessageAction, ID: 0, Cause: None, Source: EventSource.USER, Content: First user message + # Event 1: RecallAction, ID: 1, Cause: None, Source: EventSource.USER, Content: N/A + # Event 2: NullObservation, ID: 2, Cause: 1, Source: EventSource.ENVIRONMENT, Content: + # Event 3: AgentStateChangedObservation, ID: 3, Cause: None, Source: EventSource.ENVIRONMENT, Content: + + # Find the RecallAction event (should be automatically created) + recall_actions = [event for event in events if isinstance(event, RecallAction)] + assert len(recall_actions) > 0, 'No RecallAction was created' + recall_action = recall_actions[0] + + # Find any NullObservation events + null_obs_events = [ + event for event in events if isinstance(event, NullObservation) + ] + assert len(null_obs_events) > 0, 'No NullObservation was created' + null_observation = null_obs_events[0] + + # Verify the NullObservation has a cause that points to the RecallAction + assert null_observation.cause is not None, 'NullObservation cause is None' + assert ( + null_observation.cause == recall_action.id + ), f'Expected cause={recall_action.id}, got cause={null_observation.cause}' + + # Verify the controller's should_step method returns True for this observation + assert controller.should_step( + null_observation + ), 'should_step should return True for this NullObservation' + + # Verify the controller's step method was called + # This means the controller processed the NullObservation + assert mock_step.called, "Controller's step method was not called" + + # Now test with a NullObservation that has cause=0 + # Create a NullObservation with cause = 0 (pointing to the first user message) + null_observation_zero = NullObservation(content='Test observation with cause=0') + null_observation_zero._cause = 0 # type: ignore[attr-defined] + + # Verify the controller's should_step method would return False for this observation + assert not controller.should_step( + null_observation_zero + ), 'should_step should return False for NullObservation with cause=0' + + +def test_agent_controller_should_step_with_null_observation_cause_zero(): + """Test that AgentController's should_step method returns False for NullObservation with cause = 0.""" + # Create a mock event stream + file_store = InMemoryFileStore() + event_stream = EventStream(sid='test-session', file_store=file_store) + + # Create a mock agent + mock_agent = MagicMock(spec=Agent) + + # Create an agent controller + controller = AgentController( + agent=mock_agent, + event_stream=event_stream, + max_iterations=10, + sid='test-session', + ) + + # Create a NullObservation with cause = 0 + # This should not happen, but if it does, the controller shouldn't step. + null_observation = NullObservation(content='Test observation') + null_observation._cause = 0 # type: ignore[attr-defined] + + # Check if should_step returns False for this observation + result = controller.should_step(null_observation) + + # It should return False since we only want to step on NullObservation with cause > 0 + assert ( + result is False + ), 'should_step should return False for NullObservation with cause = 0' diff --git a/tests/unit/test_memory.py b/tests/unit/test_memory.py index 5bf1ceda80..663d528eb1 100644 --- a/tests/unit/test_memory.py +++ b/tests/unit/test_memory.py @@ -61,7 +61,6 @@ def prompt_dir(tmp_path): @pytest.mark.asyncio async def test_memory_on_event_exception_handling(memory, event_stream): """Test that exceptions in Memory.on_event are properly handled via status callback.""" - # Create a dummy agent for the controller agent = MagicMock(spec=Agent) agent.llm = MagicMock(spec=LLM)