Add tests for NullObservation with cause > 0 and clarify event flow (#7315)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-03-18 16:21:09 +01:00 · 2025-03-18 16:21:09 +01:00 · 259140ffc9
commit 259140ffc9
parent 3150af1ad7
3 changed files with 127 additions and 12 deletions
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -271,9 +271,9 @@ class AgentController:
            await self._react_to_exception(reported)

    def should_step(self, event: Event) -> bool:
-        """
-        Whether the agent should take a step based on an event. In general,
-        the agent should take a step if it receives a message from the user,
+        """Whether the agent should take a step based on an event.
+
+        In general, the agent should take a step if it receives a message from the user,
        or observes something in the environment (after acting).
        """
        # it might be the delegate's day in the sun
@ -296,7 +296,8 @@ class AgentController:
            if (
                isinstance(event, NullObservation)
                and event.cause is not None
-                and event.cause > 0
+                and event.cause
+                > 0  # NullObservation has cause > 0 (RecallAction), not 0 (user message)
            ):
                return True
            if isinstance(event, AgentStateChangedObservation) or isinstance(
@ -312,7 +313,6 @@ class AgentController:
        Args:
            event (Event): The incoming event to process.
        """
-
        # If we have a delegate that is not finished or errored, forward events to it
        if self.delegate is not None:
            delegate_state = self.delegate.get_agent_state()
@ -469,7 +469,7 @@ class AgentController:
            await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)

    def _reset(self) -> None:
-        """Resets the agent controller"""
+        """Resets the agent controller."""
        # Runnable actions need an Observation
        # make sure there is an Observation with the tool call metadata to be recognized by the agent
        # otherwise the pending action is found in history, but it's incomplete without an obs with tool result
@ -621,7 +621,8 @@ class AgentController:
        )

    def end_delegate(self) -> None:
-        """Ends the currently active delegate (e.g., if it is finished or errored)
+        """Ends the currently active delegate (e.g., if it is finished or errored).
+
        so that this controller can resume normal operation.
        """
        if self.delegate is None:
@ -1029,8 +1030,9 @@ class AgentController:
        )

    def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
-        """Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
-        and ensuring the first user message is always included.
+        """Cuts history roughly in half when context window is exceeded.
+
+        It preserves action-observation pairs and ensures that the first user message is always included.

        The algorithm:
        1. Cut history in half
@ -1183,8 +1185,7 @@ class AgentController:
        return False

    def _first_user_message(self) -> MessageAction | None:
-        """
-        Get the first user message for this agent.
+        """Get the first user message for this agent.

        For regular agents, this is the first user message from the beginning (start_id=0).
        For delegate agents, this is the first user message after the delegate's start_id.
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@ -20,6 +20,7 @@ from openhands.events.observation import (
    ErrorObservation,
 )
 from openhands.events.observation.agent import RecallObservation
+from openhands.events.observation.empty import NullObservation
 from openhands.events.serialization import event_to_dict
 from openhands.llm import LLM
 from openhands.llm.metrics import Metrics, TokenUsage
@ -1038,3 +1039,117 @@ async def test_first_user_message_with_identical_content():
    )  # This should be False, but may be True if there's a bug

    await controller.close()
+
+
+async def test_agent_controller_processes_null_observation_with_cause():
+    """Test that AgentController processes NullObservation events with a cause value.
+
+    And that the agent's step method is called as a result.
+    """
+    # Create an in-memory file store and real event stream
+    file_store = InMemoryFileStore()
+    event_stream = EventStream(sid='test-session', file_store=file_store)
+
+    # Create a Memory instance - not used directly in this test but needed for setup
+    Memory(event_stream=event_stream, sid='test-session')
+
+    # Create a mock agent with necessary attributes
+    mock_agent = MagicMock(spec=Agent)
+    mock_agent.llm = MagicMock(spec=LLM)
+    mock_agent.llm.metrics = Metrics()
+    mock_agent.llm.config = AppConfig().get_llm_config()
+
+    # Create a controller with the mock agent
+    controller = AgentController(
+        agent=mock_agent,
+        event_stream=event_stream,
+        max_iterations=10,
+        sid='test-session',
+    )
+
+    # Patch the controller's step method to track calls
+    with patch.object(controller, 'step') as mock_step:
+        # Create and add the first user message (will have ID 0)
+        user_message = MessageAction(content='First user message')
+        user_message._source = EventSource.USER  # type: ignore[attr-defined]
+        event_stream.add_event(user_message, EventSource.USER)
+
+        # Give it a little time to process
+        await asyncio.sleep(0.3)
+
+        # Get all events from the stream
+        events = list(event_stream.get_events())
+
+        # Events in the stream:
+        # Event 0: MessageAction, ID: 0, Cause: None, Source: EventSource.USER, Content: First user message
+        # Event 1: RecallAction, ID: 1, Cause: None, Source: EventSource.USER, Content: N/A
+        # Event 2: NullObservation, ID: 2, Cause: 1, Source: EventSource.ENVIRONMENT, Content:
+        # Event 3: AgentStateChangedObservation, ID: 3, Cause: None, Source: EventSource.ENVIRONMENT, Content:
+
+        # Find the RecallAction event (should be automatically created)
+        recall_actions = [event for event in events if isinstance(event, RecallAction)]
+        assert len(recall_actions) > 0, 'No RecallAction was created'
+        recall_action = recall_actions[0]
+
+        # Find any NullObservation events
+        null_obs_events = [
+            event for event in events if isinstance(event, NullObservation)
+        ]
+        assert len(null_obs_events) > 0, 'No NullObservation was created'
+        null_observation = null_obs_events[0]
+
+        # Verify the NullObservation has a cause that points to the RecallAction
+        assert null_observation.cause is not None, 'NullObservation cause is None'
+        assert (
+            null_observation.cause == recall_action.id
+        ), f'Expected cause={recall_action.id}, got cause={null_observation.cause}'
+
+        # Verify the controller's should_step method returns True for this observation
+        assert controller.should_step(
+            null_observation
+        ), 'should_step should return True for this NullObservation'
+
+        # Verify the controller's step method was called
+        # This means the controller processed the NullObservation
+        assert mock_step.called, "Controller's step method was not called"
+
+        # Now test with a NullObservation that has cause=0
+        # Create a NullObservation with cause = 0 (pointing to the first user message)
+        null_observation_zero = NullObservation(content='Test observation with cause=0')
+        null_observation_zero._cause = 0  # type: ignore[attr-defined]
+
+        # Verify the controller's should_step method would return False for this observation
+        assert not controller.should_step(
+            null_observation_zero
+        ), 'should_step should return False for NullObservation with cause=0'
+
+
+def test_agent_controller_should_step_with_null_observation_cause_zero():
+    """Test that AgentController's should_step method returns False for NullObservation with cause = 0."""
+    # Create a mock event stream
+    file_store = InMemoryFileStore()
+    event_stream = EventStream(sid='test-session', file_store=file_store)
+
+    # Create a mock agent
+    mock_agent = MagicMock(spec=Agent)
+
+    # Create an agent controller
+    controller = AgentController(
+        agent=mock_agent,
+        event_stream=event_stream,
+        max_iterations=10,
+        sid='test-session',
+    )
+
+    # Create a NullObservation with cause = 0
+    # This should not happen, but if it does, the controller shouldn't step.
+    null_observation = NullObservation(content='Test observation')
+    null_observation._cause = 0  # type: ignore[attr-defined]
+
+    # Check if should_step returns False for this observation
+    result = controller.should_step(null_observation)
+
+    # It should return False since we only want to step on NullObservation with cause > 0
+    assert (
+        result is False
+    ), 'should_step should return False for NullObservation with cause = 0'
--- a/tests/unit/test_memory.py
+++ b/tests/unit/test_memory.py
@ -61,7 +61,6 @@ def prompt_dir(tmp_path):
@pytest.mark.asyncio
 async def test_memory_on_event_exception_handling(memory, event_stream):
    """Test that exceptions in Memory.on_event are properly handled via status callback."""
-
    # Create a dummy agent for the controller
    agent = MagicMock(spec=Agent)
    agent.llm = MagicMock(spec=LLM)