mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-25 21:36:52 +08:00
fix: Prevent old instructions from being re-executed after conversation condensation (#11982)
This commit is contained in:
parent
dc14624480
commit
435e537693
@ -194,9 +194,12 @@ class CodeActAgent(Agent):
|
||||
# event we'll just return that instead of an action. The controller will
|
||||
# immediately ask the agent to step again with the new view.
|
||||
condensed_history: list[Event] = []
|
||||
# Track which event IDs have been forgotten/condensed
|
||||
forgotten_event_ids: set[int] = set()
|
||||
match self.condenser.condensed_history(state):
|
||||
case View(events=events):
|
||||
case View(events=events, forgotten_event_ids=forgotten_ids):
|
||||
condensed_history = events
|
||||
forgotten_event_ids = forgotten_ids
|
||||
|
||||
case Condensation(action=condensation_action):
|
||||
return condensation_action
|
||||
@ -206,7 +209,9 @@ class CodeActAgent(Agent):
|
||||
)
|
||||
|
||||
initial_user_message = self._get_initial_user_message(state.history)
|
||||
messages = self._get_messages(condensed_history, initial_user_message)
|
||||
messages = self._get_messages(
|
||||
condensed_history, initial_user_message, forgotten_event_ids
|
||||
)
|
||||
params: dict = {
|
||||
'messages': messages,
|
||||
}
|
||||
@ -245,7 +250,10 @@ class CodeActAgent(Agent):
|
||||
return initial_user_message
|
||||
|
||||
def _get_messages(
|
||||
self, events: list[Event], initial_user_message: MessageAction
|
||||
self,
|
||||
events: list[Event],
|
||||
initial_user_message: MessageAction,
|
||||
forgotten_event_ids: set[int],
|
||||
) -> list[Message]:
|
||||
"""Constructs the message history for the LLM conversation.
|
||||
|
||||
@ -284,6 +292,7 @@ class CodeActAgent(Agent):
|
||||
messages = self.conversation_memory.process_events(
|
||||
condensed_history=events,
|
||||
initial_user_action=initial_user_message,
|
||||
forgotten_event_ids=forgotten_event_ids,
|
||||
max_message_chars=self.llm.config.max_message_chars,
|
||||
vision_is_active=self.llm.vision_is_active(),
|
||||
)
|
||||
|
||||
@ -76,6 +76,7 @@ class ConversationMemory:
|
||||
self,
|
||||
condensed_history: list[Event],
|
||||
initial_user_action: MessageAction,
|
||||
forgotten_event_ids: set[int] | None = None,
|
||||
max_message_chars: int | None = None,
|
||||
vision_is_active: bool = False,
|
||||
) -> list[Message]:
|
||||
@ -85,16 +86,23 @@ class ConversationMemory:
|
||||
|
||||
Args:
|
||||
condensed_history: The condensed history of events to convert
|
||||
initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly.
|
||||
forgotten_event_ids: Set of event IDs that have been forgotten/condensed. If the initial user action's ID
|
||||
is in this set, it will not be re-inserted to prevent re-execution of old instructions.
|
||||
max_message_chars: The maximum number of characters in the content of an event included
|
||||
in the prompt to the LLM. Larger observations are truncated.
|
||||
vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included.
|
||||
initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly.
|
||||
"""
|
||||
events = condensed_history
|
||||
# Default to empty set if not provided
|
||||
if forgotten_event_ids is None:
|
||||
forgotten_event_ids = set()
|
||||
|
||||
# Ensure the event list starts with SystemMessageAction, then MessageAction(source='user')
|
||||
self._ensure_system_message(events)
|
||||
self._ensure_initial_user_message(events, initial_user_action)
|
||||
self._ensure_initial_user_message(
|
||||
events, initial_user_action, forgotten_event_ids
|
||||
)
|
||||
|
||||
# log visual browsing status
|
||||
logger.debug(f'Visual browsing: {self.agent_config.enable_som_visual_browsing}')
|
||||
@ -827,9 +835,23 @@ class ConversationMemory:
|
||||
)
|
||||
|
||||
def _ensure_initial_user_message(
|
||||
self, events: list[Event], initial_user_action: MessageAction
|
||||
self,
|
||||
events: list[Event],
|
||||
initial_user_action: MessageAction,
|
||||
forgotten_event_ids: set[int],
|
||||
) -> None:
|
||||
"""Checks if the second event is a user MessageAction and inserts the provided one if needed."""
|
||||
"""Checks if the second event is a user MessageAction and inserts the provided one if needed.
|
||||
|
||||
IMPORTANT: If the initial user action has been condensed (its ID is in forgotten_event_ids),
|
||||
we do NOT re-insert it. This prevents old instructions from being re-executed after
|
||||
conversation condensation. The condensation summary already contains the context of
|
||||
what was requested and completed.
|
||||
|
||||
Args:
|
||||
events: The list of events to modify in-place
|
||||
initial_user_action: The initial user message action from the full history
|
||||
forgotten_event_ids: Set of event IDs that have been forgotten/condensed
|
||||
"""
|
||||
if (
|
||||
not events
|
||||
): # Should have system message from previous step, but safety check
|
||||
@ -837,6 +859,17 @@ class ConversationMemory:
|
||||
# Or raise? Let's log for now, _ensure_system_message should handle this.
|
||||
return
|
||||
|
||||
# Check if the initial user action has been condensed/forgotten.
|
||||
# If so, we should NOT re-insert it to prevent re-execution of old instructions.
|
||||
# The condensation summary already contains the context of what was requested.
|
||||
initial_user_action_id = initial_user_action.id
|
||||
if initial_user_action_id in forgotten_event_ids:
|
||||
logger.info(
|
||||
f'Initial user action (id={initial_user_action_id}) has been condensed. '
|
||||
'Not re-inserting to prevent re-execution of old instructions.'
|
||||
)
|
||||
return
|
||||
|
||||
# We expect events[0] to be SystemMessageAction after _ensure_system_message
|
||||
if len(events) == 1:
|
||||
# Only system message exists
|
||||
|
||||
@ -18,6 +18,8 @@ class View(BaseModel):
|
||||
|
||||
events: list[Event]
|
||||
unhandled_condensation_request: bool = False
|
||||
# Set of event IDs that have been forgotten/condensed
|
||||
forgotten_event_ids: set[int] = set()
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.events)
|
||||
@ -90,4 +92,5 @@ class View(BaseModel):
|
||||
return View(
|
||||
events=kept_events,
|
||||
unhandled_condensation_request=unhandled_condensation_request,
|
||||
forgotten_event_ids=forgotten_event_ids,
|
||||
)
|
||||
|
||||
@ -393,7 +393,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
|
||||
# 2. The action message
|
||||
# 3. The observation message
|
||||
mock_state.history = [initial_user_message, action, observation]
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message)
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message, set())
|
||||
assert len(messages) == 4 # System + initial user + action + observation
|
||||
assert messages[0].role == 'system' # First message should be the system message
|
||||
assert (
|
||||
@ -404,7 +404,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
|
||||
|
||||
# The same should hold if the events are presented out-of-order
|
||||
mock_state.history = [initial_user_message, observation, action]
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message)
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message, set())
|
||||
assert len(messages) == 4
|
||||
assert messages[0].role == 'system' # First message should be the system message
|
||||
assert (
|
||||
@ -414,7 +414,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
|
||||
# If only one of the two events is present, then we should just get the system message
|
||||
# plus any valid message from the event
|
||||
mock_state.history = [initial_user_message, action]
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message)
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message, set())
|
||||
assert (
|
||||
len(messages) == 2
|
||||
) # System + initial user message, action is waiting for its observation
|
||||
@ -422,7 +422,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
|
||||
assert messages[1].role == 'user'
|
||||
|
||||
mock_state.history = [initial_user_message, observation]
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message)
|
||||
messages = agent._get_messages(mock_state.history, initial_user_message, set())
|
||||
assert (
|
||||
len(messages) == 2
|
||||
) # System + initial user message, observation has no matching action
|
||||
|
||||
@ -80,7 +80,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
|
||||
history.append(message_action_5)
|
||||
|
||||
codeact_agent.reset()
|
||||
messages = codeact_agent._get_messages(history, message_action_1)
|
||||
messages = codeact_agent._get_messages(history, message_action_1, set())
|
||||
|
||||
assert (
|
||||
len(messages) == 6
|
||||
@ -122,7 +122,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
|
||||
history.append(message_action_agent)
|
||||
|
||||
codeact_agent.reset()
|
||||
messages = codeact_agent._get_messages(history, initial_user_message)
|
||||
messages = codeact_agent._get_messages(history, initial_user_message, set())
|
||||
|
||||
# Check that only the last two user messages have cache_prompt=True
|
||||
cached_user_messages = [
|
||||
|
||||
@ -158,7 +158,8 @@ def test_ensure_initial_user_message_adds_if_only_system(
|
||||
system_message = SystemMessageAction(content='System')
|
||||
system_message._source = EventSource.AGENT
|
||||
events = [system_message]
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action)
|
||||
# Pass empty set for forgotten_event_ids (no events have been condensed)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
|
||||
assert len(events) == 2
|
||||
assert events[0] == system_message
|
||||
assert events[1] == initial_user_action
|
||||
@ -177,7 +178,8 @@ def test_ensure_initial_user_message_correct_already_present(
|
||||
agent_message,
|
||||
]
|
||||
original_events = list(events)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action)
|
||||
# Pass empty set for forgotten_event_ids (no events have been condensed)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
|
||||
assert events == original_events
|
||||
|
||||
|
||||
@ -189,7 +191,8 @@ def test_ensure_initial_user_message_incorrect_at_index_1(
|
||||
incorrect_second_message = MessageAction(content='Assistant')
|
||||
incorrect_second_message._source = EventSource.AGENT
|
||||
events = [system_message, incorrect_second_message]
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action)
|
||||
# Pass empty set for forgotten_event_ids (no events have been condensed)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
|
||||
assert len(events) == 3
|
||||
assert events[0] == system_message
|
||||
assert events[1] == initial_user_action # Correct one inserted
|
||||
@ -206,7 +209,8 @@ def test_ensure_initial_user_message_correct_present_later(
|
||||
# Correct initial message is present, but later in the list
|
||||
events = [system_message, incorrect_second_message]
|
||||
conversation_memory._ensure_system_message(events)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action)
|
||||
# Pass empty set for forgotten_event_ids (no events have been condensed)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
|
||||
assert len(events) == 3 # Should still insert at index 1, not remove the later one
|
||||
assert events[0] == system_message
|
||||
assert events[1] == initial_user_action # Correct one inserted at index 1
|
||||
@ -222,7 +226,8 @@ def test_ensure_initial_user_message_different_user_msg_at_index_1(
|
||||
different_user_message = MessageAction(content='Different User Message')
|
||||
different_user_message._source = EventSource.USER
|
||||
events = [system_message, different_user_message]
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action)
|
||||
# Pass empty set for forgotten_event_ids (no events have been condensed)
|
||||
conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
|
||||
assert len(events) == 2
|
||||
assert events[0] == system_message
|
||||
assert events[1] == different_user_message # Original second message remains
|
||||
@ -1583,3 +1588,132 @@ def test_process_ipython_observation_with_vision_disabled(
|
||||
assert isinstance(message.content[1], ImageContent)
|
||||
# Check that NO explanatory text about filtered images was added when vision is disabled
|
||||
assert 'invalid or empty image(s) were filtered' not in message.content[0].text
|
||||
|
||||
|
||||
def test_ensure_initial_user_message_not_reinserted_when_condensed(
|
||||
conversation_memory, initial_user_action
|
||||
):
|
||||
"""Test that initial user message is NOT re-inserted when it has been condensed.
|
||||
|
||||
This is a critical test for bug #11910: Old instructions should not be re-executed
|
||||
after conversation condensation. If the initial user message has been condensed
|
||||
(its ID is in the forgotten_event_ids set), we should NOT re-insert it to prevent
|
||||
the LLM from seeing old instructions as fresh commands.
|
||||
"""
|
||||
system_message = SystemMessageAction(content='System')
|
||||
system_message._source = EventSource.AGENT
|
||||
|
||||
# Simulate that the initial_user_action has been condensed by adding its ID
|
||||
# to the forgotten_event_ids set
|
||||
initial_user_action._id = 1 # Assign an ID to the initial user action
|
||||
forgotten_event_ids = {1} # The initial user action's ID is in the forgotten set
|
||||
|
||||
events = [system_message] # Only system message, no user message
|
||||
|
||||
# Call _ensure_initial_user_message with the condensed event ID
|
||||
conversation_memory._ensure_initial_user_message(
|
||||
events, initial_user_action, forgotten_event_ids
|
||||
)
|
||||
|
||||
# The initial user action should NOT be inserted because it was condensed
|
||||
assert len(events) == 1
|
||||
assert events[0] == system_message
|
||||
# Verify the initial user action was NOT added
|
||||
assert initial_user_action not in events
|
||||
|
||||
|
||||
def test_ensure_initial_user_message_reinserted_when_not_condensed(
|
||||
conversation_memory, initial_user_action
|
||||
):
|
||||
"""Test that initial user message IS re-inserted when it has NOT been condensed.
|
||||
|
||||
This ensures backward compatibility: when no condensation has happened,
|
||||
the initial user message should still be inserted as before.
|
||||
"""
|
||||
system_message = SystemMessageAction(content='System')
|
||||
system_message._source = EventSource.AGENT
|
||||
|
||||
# The initial user action has NOT been condensed
|
||||
initial_user_action._id = 1
|
||||
forgotten_event_ids = {5, 10, 15} # Different IDs, not including the initial action
|
||||
|
||||
events = [system_message]
|
||||
|
||||
# Call _ensure_initial_user_message with non-matching forgotten IDs
|
||||
conversation_memory._ensure_initial_user_message(
|
||||
events, initial_user_action, forgotten_event_ids
|
||||
)
|
||||
|
||||
# The initial user action SHOULD be inserted because it was NOT condensed
|
||||
assert len(events) == 2
|
||||
assert events[0] == system_message
|
||||
assert events[1] == initial_user_action
|
||||
|
||||
|
||||
def test_process_events_does_not_reinsert_condensed_initial_message(
|
||||
conversation_memory,
|
||||
):
|
||||
"""Test that process_events does not re-insert initial user message when condensed.
|
||||
|
||||
This is an integration test for the full process_events flow, verifying that
|
||||
when the initial user message has been condensed, it is not re-inserted into
|
||||
the conversation sent to the LLM.
|
||||
"""
|
||||
# Create a system message
|
||||
system_message = SystemMessageAction(content='System message')
|
||||
system_message._source = EventSource.AGENT
|
||||
system_message._id = 0
|
||||
|
||||
# Create the initial user message (will be marked as condensed)
|
||||
initial_user_message = MessageAction(content='Do task A, B, and C')
|
||||
initial_user_message._source = EventSource.USER
|
||||
initial_user_message._id = 1
|
||||
|
||||
# Create a condensation summary observation
|
||||
from openhands.events.observation.agent import AgentCondensationObservation
|
||||
|
||||
condensation_summary = AgentCondensationObservation(
|
||||
content='Summary: User requested tasks A, B, C. Task A was completed successfully.'
|
||||
)
|
||||
condensation_summary._id = 2
|
||||
|
||||
# Create a recent user message (not condensed)
|
||||
recent_user_message = MessageAction(content='Now continue with task D')
|
||||
recent_user_message._source = EventSource.USER
|
||||
recent_user_message._id = 3
|
||||
|
||||
# Simulate condensed history: system + summary + recent message
|
||||
# The initial user message (id=1) has been condensed/forgotten
|
||||
condensed_history = [system_message, condensation_summary, recent_user_message]
|
||||
|
||||
# The initial user message's ID is in the forgotten set
|
||||
forgotten_event_ids = {1}
|
||||
|
||||
messages = conversation_memory.process_events(
|
||||
condensed_history=condensed_history,
|
||||
initial_user_action=initial_user_message,
|
||||
forgotten_event_ids=forgotten_event_ids,
|
||||
max_message_chars=None,
|
||||
vision_is_active=False,
|
||||
)
|
||||
|
||||
# Verify the structure of messages
|
||||
# Should have: system, condensation summary, recent user message
|
||||
# Should NOT have the initial user message "Do task A, B, and C"
|
||||
assert len(messages) == 3
|
||||
assert messages[0].role == 'system'
|
||||
assert messages[0].content[0].text == 'System message'
|
||||
|
||||
# The second message should be the condensation summary, NOT the initial user message
|
||||
assert messages[1].role == 'user'
|
||||
assert 'Summary: User requested tasks A, B, C' in messages[1].content[0].text
|
||||
|
||||
# The third message should be the recent user message
|
||||
assert messages[2].role == 'user'
|
||||
assert 'Now continue with task D' in messages[2].content[0].text
|
||||
|
||||
# Critically, the old instruction should NOT appear
|
||||
for msg in messages:
|
||||
for content in msg.content:
|
||||
if hasattr(content, 'text'):
|
||||
assert 'Do task A, B, and C' not in content.text
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user