mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Add tests for NullObservation with cause > 0 and clarify event flow (#7315)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
3150af1ad7
commit
259140ffc9
@ -271,9 +271,9 @@ class AgentController:
|
||||
await self._react_to_exception(reported)
|
||||
|
||||
def should_step(self, event: Event) -> bool:
|
||||
"""
|
||||
Whether the agent should take a step based on an event. In general,
|
||||
the agent should take a step if it receives a message from the user,
|
||||
"""Whether the agent should take a step based on an event.
|
||||
|
||||
In general, the agent should take a step if it receives a message from the user,
|
||||
or observes something in the environment (after acting).
|
||||
"""
|
||||
# it might be the delegate's day in the sun
|
||||
@ -296,7 +296,8 @@ class AgentController:
|
||||
if (
|
||||
isinstance(event, NullObservation)
|
||||
and event.cause is not None
|
||||
and event.cause > 0
|
||||
and event.cause
|
||||
> 0 # NullObservation has cause > 0 (RecallAction), not 0 (user message)
|
||||
):
|
||||
return True
|
||||
if isinstance(event, AgentStateChangedObservation) or isinstance(
|
||||
@ -312,7 +313,6 @@ class AgentController:
|
||||
Args:
|
||||
event (Event): The incoming event to process.
|
||||
"""
|
||||
|
||||
# If we have a delegate that is not finished or errored, forward events to it
|
||||
if self.delegate is not None:
|
||||
delegate_state = self.delegate.get_agent_state()
|
||||
@ -469,7 +469,7 @@ class AgentController:
|
||||
await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
|
||||
|
||||
def _reset(self) -> None:
|
||||
"""Resets the agent controller"""
|
||||
"""Resets the agent controller."""
|
||||
# Runnable actions need an Observation
|
||||
# make sure there is an Observation with the tool call metadata to be recognized by the agent
|
||||
# otherwise the pending action is found in history, but it's incomplete without an obs with tool result
|
||||
@ -621,7 +621,8 @@ class AgentController:
|
||||
)
|
||||
|
||||
def end_delegate(self) -> None:
|
||||
"""Ends the currently active delegate (e.g., if it is finished or errored)
|
||||
"""Ends the currently active delegate (e.g., if it is finished or errored).
|
||||
|
||||
so that this controller can resume normal operation.
|
||||
"""
|
||||
if self.delegate is None:
|
||||
@ -1029,8 +1030,9 @@ class AgentController:
|
||||
)
|
||||
|
||||
def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
|
||||
"""Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
|
||||
and ensuring the first user message is always included.
|
||||
"""Cuts history roughly in half when context window is exceeded.
|
||||
|
||||
It preserves action-observation pairs and ensures that the first user message is always included.
|
||||
|
||||
The algorithm:
|
||||
1. Cut history in half
|
||||
@ -1183,8 +1185,7 @@ class AgentController:
|
||||
return False
|
||||
|
||||
def _first_user_message(self) -> MessageAction | None:
|
||||
"""
|
||||
Get the first user message for this agent.
|
||||
"""Get the first user message for this agent.
|
||||
|
||||
For regular agents, this is the first user message from the beginning (start_id=0).
|
||||
For delegate agents, this is the first user message after the delegate's start_id.
|
||||
|
||||
@ -20,6 +20,7 @@ from openhands.events.observation import (
|
||||
ErrorObservation,
|
||||
)
|
||||
from openhands.events.observation.agent import RecallObservation
|
||||
from openhands.events.observation.empty import NullObservation
|
||||
from openhands.events.serialization import event_to_dict
|
||||
from openhands.llm import LLM
|
||||
from openhands.llm.metrics import Metrics, TokenUsage
|
||||
@ -1038,3 +1039,117 @@ async def test_first_user_message_with_identical_content():
|
||||
) # This should be False, but may be True if there's a bug
|
||||
|
||||
await controller.close()
|
||||
|
||||
|
||||
async def test_agent_controller_processes_null_observation_with_cause():
|
||||
"""Test that AgentController processes NullObservation events with a cause value.
|
||||
|
||||
And that the agent's step method is called as a result.
|
||||
"""
|
||||
# Create an in-memory file store and real event stream
|
||||
file_store = InMemoryFileStore()
|
||||
event_stream = EventStream(sid='test-session', file_store=file_store)
|
||||
|
||||
# Create a Memory instance - not used directly in this test but needed for setup
|
||||
Memory(event_stream=event_stream, sid='test-session')
|
||||
|
||||
# Create a mock agent with necessary attributes
|
||||
mock_agent = MagicMock(spec=Agent)
|
||||
mock_agent.llm = MagicMock(spec=LLM)
|
||||
mock_agent.llm.metrics = Metrics()
|
||||
mock_agent.llm.config = AppConfig().get_llm_config()
|
||||
|
||||
# Create a controller with the mock agent
|
||||
controller = AgentController(
|
||||
agent=mock_agent,
|
||||
event_stream=event_stream,
|
||||
max_iterations=10,
|
||||
sid='test-session',
|
||||
)
|
||||
|
||||
# Patch the controller's step method to track calls
|
||||
with patch.object(controller, 'step') as mock_step:
|
||||
# Create and add the first user message (will have ID 0)
|
||||
user_message = MessageAction(content='First user message')
|
||||
user_message._source = EventSource.USER # type: ignore[attr-defined]
|
||||
event_stream.add_event(user_message, EventSource.USER)
|
||||
|
||||
# Give it a little time to process
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# Get all events from the stream
|
||||
events = list(event_stream.get_events())
|
||||
|
||||
# Events in the stream:
|
||||
# Event 0: MessageAction, ID: 0, Cause: None, Source: EventSource.USER, Content: First user message
|
||||
# Event 1: RecallAction, ID: 1, Cause: None, Source: EventSource.USER, Content: N/A
|
||||
# Event 2: NullObservation, ID: 2, Cause: 1, Source: EventSource.ENVIRONMENT, Content:
|
||||
# Event 3: AgentStateChangedObservation, ID: 3, Cause: None, Source: EventSource.ENVIRONMENT, Content:
|
||||
|
||||
# Find the RecallAction event (should be automatically created)
|
||||
recall_actions = [event for event in events if isinstance(event, RecallAction)]
|
||||
assert len(recall_actions) > 0, 'No RecallAction was created'
|
||||
recall_action = recall_actions[0]
|
||||
|
||||
# Find any NullObservation events
|
||||
null_obs_events = [
|
||||
event for event in events if isinstance(event, NullObservation)
|
||||
]
|
||||
assert len(null_obs_events) > 0, 'No NullObservation was created'
|
||||
null_observation = null_obs_events[0]
|
||||
|
||||
# Verify the NullObservation has a cause that points to the RecallAction
|
||||
assert null_observation.cause is not None, 'NullObservation cause is None'
|
||||
assert (
|
||||
null_observation.cause == recall_action.id
|
||||
), f'Expected cause={recall_action.id}, got cause={null_observation.cause}'
|
||||
|
||||
# Verify the controller's should_step method returns True for this observation
|
||||
assert controller.should_step(
|
||||
null_observation
|
||||
), 'should_step should return True for this NullObservation'
|
||||
|
||||
# Verify the controller's step method was called
|
||||
# This means the controller processed the NullObservation
|
||||
assert mock_step.called, "Controller's step method was not called"
|
||||
|
||||
# Now test with a NullObservation that has cause=0
|
||||
# Create a NullObservation with cause = 0 (pointing to the first user message)
|
||||
null_observation_zero = NullObservation(content='Test observation with cause=0')
|
||||
null_observation_zero._cause = 0 # type: ignore[attr-defined]
|
||||
|
||||
# Verify the controller's should_step method would return False for this observation
|
||||
assert not controller.should_step(
|
||||
null_observation_zero
|
||||
), 'should_step should return False for NullObservation with cause=0'
|
||||
|
||||
|
||||
def test_agent_controller_should_step_with_null_observation_cause_zero():
|
||||
"""Test that AgentController's should_step method returns False for NullObservation with cause = 0."""
|
||||
# Create a mock event stream
|
||||
file_store = InMemoryFileStore()
|
||||
event_stream = EventStream(sid='test-session', file_store=file_store)
|
||||
|
||||
# Create a mock agent
|
||||
mock_agent = MagicMock(spec=Agent)
|
||||
|
||||
# Create an agent controller
|
||||
controller = AgentController(
|
||||
agent=mock_agent,
|
||||
event_stream=event_stream,
|
||||
max_iterations=10,
|
||||
sid='test-session',
|
||||
)
|
||||
|
||||
# Create a NullObservation with cause = 0
|
||||
# This should not happen, but if it does, the controller shouldn't step.
|
||||
null_observation = NullObservation(content='Test observation')
|
||||
null_observation._cause = 0 # type: ignore[attr-defined]
|
||||
|
||||
# Check if should_step returns False for this observation
|
||||
result = controller.should_step(null_observation)
|
||||
|
||||
# It should return False since we only want to step on NullObservation with cause > 0
|
||||
assert (
|
||||
result is False
|
||||
), 'should_step should return False for NullObservation with cause = 0'
|
||||
|
||||
@ -61,7 +61,6 @@ def prompt_dir(tmp_path):
|
||||
@pytest.mark.asyncio
|
||||
async def test_memory_on_event_exception_handling(memory, event_stream):
|
||||
"""Test that exceptions in Memory.on_event are properly handled via status callback."""
|
||||
|
||||
# Create a dummy agent for the controller
|
||||
agent = MagicMock(spec=Agent)
|
||||
agent.llm = MagicMock(spec=LLM)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user