From 259140ffc9ead19d36292d2a0cecef1a06e70798 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Tue, 18 Mar 2025 16:21:09 +0100
Subject: [PATCH] Add tests for NullObservation with cause > 0 and clarify
 event flow (#7315)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/controller/agent_controller.py |  23 ++---
 tests/unit/test_agent_controller.py      | 115 +++++++++++++++++++++++
 tests/unit/test_memory.py                |   1 -
 3 files changed, 127 insertions(+), 12 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index b4571c5ff4..c3d82f2c93 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -271,9 +271,9 @@ class AgentController:
             await self._react_to_exception(reported)
 
     def should_step(self, event: Event) -> bool:
-        """
-        Whether the agent should take a step based on an event. In general,
-        the agent should take a step if it receives a message from the user,
+        """Whether the agent should take a step based on an event.
+
+        In general, the agent should take a step if it receives a message from the user,
         or observes something in the environment (after acting).
         """
         # it might be the delegate's day in the sun
@@ -296,7 +296,8 @@ class AgentController:
             if (
                 isinstance(event, NullObservation)
                 and event.cause is not None
-                and event.cause > 0
+                and event.cause
+                > 0  # NullObservation has cause > 0 (RecallAction), not 0 (user message)
             ):
                 return True
             if isinstance(event, AgentStateChangedObservation) or isinstance(
@@ -312,7 +313,6 @@ class AgentController:
         Args:
             event (Event): The incoming event to process.
         """
-
         # If we have a delegate that is not finished or errored, forward events to it
         if self.delegate is not None:
             delegate_state = self.delegate.get_agent_state()
@@ -469,7 +469,7 @@ class AgentController:
             await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
 
     def _reset(self) -> None:
-        """Resets the agent controller"""
+        """Resets the agent controller."""
         # Runnable actions need an Observation
         # make sure there is an Observation with the tool call metadata to be recognized by the agent
         # otherwise the pending action is found in history, but it's incomplete without an obs with tool result
@@ -621,7 +621,8 @@ class AgentController:
         )
 
     def end_delegate(self) -> None:
-        """Ends the currently active delegate (e.g., if it is finished or errored)
+        """Ends the currently active delegate (e.g., if it is finished or errored).
+
         so that this controller can resume normal operation.
         """
         if self.delegate is None:
@@ -1029,8 +1030,9 @@ class AgentController:
         )
 
     def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
-        """Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
-        and ensuring the first user message is always included.
+        """Cuts history roughly in half when context window is exceeded.
+
+        It preserves action-observation pairs and ensures that the first user message is always included.
 
         The algorithm:
         1. Cut history in half
@@ -1183,8 +1185,7 @@ class AgentController:
         return False
 
     def _first_user_message(self) -> MessageAction | None:
-        """
-        Get the first user message for this agent.
+        """Get the first user message for this agent.
 
         For regular agents, this is the first user message from the beginning (start_id=0).
         For delegate agents, this is the first user message after the delegate's start_id.
diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index dda09da893..fc59069bc9 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -20,6 +20,7 @@ from openhands.events.observation import (
     ErrorObservation,
 )
 from openhands.events.observation.agent import RecallObservation
+from openhands.events.observation.empty import NullObservation
 from openhands.events.serialization import event_to_dict
 from openhands.llm import LLM
 from openhands.llm.metrics import Metrics, TokenUsage
@@ -1038,3 +1039,117 @@ async def test_first_user_message_with_identical_content():
     )  # This should be False, but may be True if there's a bug
 
     await controller.close()
+
+
+async def test_agent_controller_processes_null_observation_with_cause():
+    """Test that AgentController processes NullObservation events with a cause value.
+
+    And that the agent's step method is called as a result.
+    """
+    # Create an in-memory file store and real event stream
+    file_store = InMemoryFileStore()
+    event_stream = EventStream(sid='test-session', file_store=file_store)
+
+    # Create a Memory instance - not used directly in this test but needed for setup
+    Memory(event_stream=event_stream, sid='test-session')
+
+    # Create a mock agent with necessary attributes
+    mock_agent = MagicMock(spec=Agent)
+    mock_agent.llm = MagicMock(spec=LLM)
+    mock_agent.llm.metrics = Metrics()
+    mock_agent.llm.config = AppConfig().get_llm_config()
+
+    # Create a controller with the mock agent
+    controller = AgentController(
+        agent=mock_agent,
+        event_stream=event_stream,
+        max_iterations=10,
+        sid='test-session',
+    )
+
+    # Patch the controller's step method to track calls
+    with patch.object(controller, 'step') as mock_step:
+        # Create and add the first user message (will have ID 0)
+        user_message = MessageAction(content='First user message')
+        user_message._source = EventSource.USER  # type: ignore[attr-defined]
+        event_stream.add_event(user_message, EventSource.USER)
+
+        # Give it a little time to process
+        await asyncio.sleep(0.3)
+
+        # Get all events from the stream
+        events = list(event_stream.get_events())
+
+        # Events in the stream:
+        # Event 0: MessageAction, ID: 0, Cause: None, Source: EventSource.USER, Content: First user message
+        # Event 1: RecallAction, ID: 1, Cause: None, Source: EventSource.USER, Content: N/A
+        # Event 2: NullObservation, ID: 2, Cause: 1, Source: EventSource.ENVIRONMENT, Content:
+        # Event 3: AgentStateChangedObservation, ID: 3, Cause: None, Source: EventSource.ENVIRONMENT, Content:
+
+        # Find the RecallAction event (should be automatically created)
+        recall_actions = [event for event in events if isinstance(event, RecallAction)]
+        assert len(recall_actions) > 0, 'No RecallAction was created'
+        recall_action = recall_actions[0]
+
+        # Find any NullObservation events
+        null_obs_events = [
+            event for event in events if isinstance(event, NullObservation)
+        ]
+        assert len(null_obs_events) > 0, 'No NullObservation was created'
+        null_observation = null_obs_events[0]
+
+        # Verify the NullObservation has a cause that points to the RecallAction
+        assert null_observation.cause is not None, 'NullObservation cause is None'
+        assert (
+            null_observation.cause == recall_action.id
+        ), f'Expected cause={recall_action.id}, got cause={null_observation.cause}'
+
+        # Verify the controller's should_step method returns True for this observation
+        assert controller.should_step(
+            null_observation
+        ), 'should_step should return True for this NullObservation'
+
+        # Verify the controller's step method was called
+        # This means the controller processed the NullObservation
+        assert mock_step.called, "Controller's step method was not called"
+
+        # Now test with a NullObservation that has cause=0
+        # Create a NullObservation with cause = 0 (pointing to the first user message)
+        null_observation_zero = NullObservation(content='Test observation with cause=0')
+        null_observation_zero._cause = 0  # type: ignore[attr-defined]
+
+        # Verify the controller's should_step method would return False for this observation
+        assert not controller.should_step(
+            null_observation_zero
+        ), 'should_step should return False for NullObservation with cause=0'
+
+
+def test_agent_controller_should_step_with_null_observation_cause_zero():
+    """Test that AgentController's should_step method returns False for NullObservation with cause = 0."""
+    # Create a mock event stream
+    file_store = InMemoryFileStore()
+    event_stream = EventStream(sid='test-session', file_store=file_store)
+
+    # Create a mock agent
+    mock_agent = MagicMock(spec=Agent)
+
+    # Create an agent controller
+    controller = AgentController(
+        agent=mock_agent,
+        event_stream=event_stream,
+        max_iterations=10,
+        sid='test-session',
+    )
+
+    # Create a NullObservation with cause = 0
+    # This should not happen, but if it does, the controller shouldn't step.
+    null_observation = NullObservation(content='Test observation')
+    null_observation._cause = 0  # type: ignore[attr-defined]
+
+    # Check if should_step returns False for this observation
+    result = controller.should_step(null_observation)
+
+    # It should return False since we only want to step on NullObservation with cause > 0
+    assert (
+        result is False
+    ), 'should_step should return False for NullObservation with cause = 0'
diff --git a/tests/unit/test_memory.py b/tests/unit/test_memory.py
index 5bf1ceda80..663d528eb1 100644
--- a/tests/unit/test_memory.py
+++ b/tests/unit/test_memory.py
@@ -61,7 +61,6 @@ def prompt_dir(tmp_path):
 @pytest.mark.asyncio
 async def test_memory_on_event_exception_handling(memory, event_stream):
     """Test that exceptions in Memory.on_event are properly handled via status callback."""
-
     # Create a dummy agent for the controller
     agent = MagicMock(spec=Agent)
     agent.llm = MagicMock(spec=LLM)