From 435e53769329578e3e14d6fcbfd6fc660e63340b Mon Sep 17 00:00:00 2001
From: Nhan Nguyen <nhan13574@gmail.com>
Date: Wed, 17 Dec 2025 07:05:10 -0500
Subject: [PATCH] fix: Prevent old instructions from being re-executed after
 conversation condensation (#11982)

---
 .../agenthub/codeact_agent/codeact_agent.py   |  15 +-
 openhands/memory/conversation_memory.py       |  41 ++++-
 openhands/memory/view.py                      |   3 +
 tests/unit/agenthub/test_agents.py            |   8 +-
 tests/unit/agenthub/test_prompt_caching.py    |   4 +-
 tests/unit/memory/test_conversation_memory.py | 144 +++++++++++++++++-
 6 files changed, 197 insertions(+), 18 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 85e5f88cbc..9dd814e9cf 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -194,9 +194,12 @@ class CodeActAgent(Agent):
         # event we'll just return that instead of an action. The controller will
         # immediately ask the agent to step again with the new view.
         condensed_history: list[Event] = []
+        # Track which event IDs have been forgotten/condensed
+        forgotten_event_ids: set[int] = set()
         match self.condenser.condensed_history(state):
-            case View(events=events):
+            case View(events=events, forgotten_event_ids=forgotten_ids):
                 condensed_history = events
+                forgotten_event_ids = forgotten_ids
 
             case Condensation(action=condensation_action):
                 return condensation_action
@@ -206,7 +209,9 @@ class CodeActAgent(Agent):
         )
 
         initial_user_message = self._get_initial_user_message(state.history)
-        messages = self._get_messages(condensed_history, initial_user_message)
+        messages = self._get_messages(
+            condensed_history, initial_user_message, forgotten_event_ids
+        )
         params: dict = {
             'messages': messages,
         }
@@ -245,7 +250,10 @@ class CodeActAgent(Agent):
         return initial_user_message
 
     def _get_messages(
-        self, events: list[Event], initial_user_message: MessageAction
+        self,
+        events: list[Event],
+        initial_user_message: MessageAction,
+        forgotten_event_ids: set[int],
     ) -> list[Message]:
         """Constructs the message history for the LLM conversation.
 
@@ -284,6 +292,7 @@ class CodeActAgent(Agent):
         messages = self.conversation_memory.process_events(
             condensed_history=events,
             initial_user_action=initial_user_message,
+            forgotten_event_ids=forgotten_event_ids,
             max_message_chars=self.llm.config.max_message_chars,
             vision_is_active=self.llm.vision_is_active(),
         )
diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py
index 5ff6ec7e58..5ae1a2cd71 100644
--- a/openhands/memory/conversation_memory.py
+++ b/openhands/memory/conversation_memory.py
@@ -76,6 +76,7 @@ class ConversationMemory:
         self,
         condensed_history: list[Event],
         initial_user_action: MessageAction,
+        forgotten_event_ids: set[int] | None = None,
         max_message_chars: int | None = None,
         vision_is_active: bool = False,
     ) -> list[Message]:
@@ -85,16 +86,23 @@ class ConversationMemory:
 
         Args:
             condensed_history: The condensed history of events to convert
+            initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly.
+            forgotten_event_ids: Set of event IDs that have been forgotten/condensed. If the initial user action's ID
+                is in this set, it will not be re-inserted to prevent re-execution of old instructions.
             max_message_chars: The maximum number of characters in the content of an event included
                 in the prompt to the LLM. Larger observations are truncated.
             vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included.
-            initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly.
         """
         events = condensed_history
+        # Default to empty set if not provided
+        if forgotten_event_ids is None:
+            forgotten_event_ids = set()
 
         # Ensure the event list starts with SystemMessageAction, then MessageAction(source='user')
         self._ensure_system_message(events)
-        self._ensure_initial_user_message(events, initial_user_action)
+        self._ensure_initial_user_message(
+            events, initial_user_action, forgotten_event_ids
+        )
 
         # log visual browsing status
         logger.debug(f'Visual browsing: {self.agent_config.enable_som_visual_browsing}')
@@ -827,9 +835,23 @@ class ConversationMemory:
                 )
 
     def _ensure_initial_user_message(
-        self, events: list[Event], initial_user_action: MessageAction
+        self,
+        events: list[Event],
+        initial_user_action: MessageAction,
+        forgotten_event_ids: set[int],
     ) -> None:
-        """Checks if the second event is a user MessageAction and inserts the provided one if needed."""
+        """Checks if the second event is a user MessageAction and inserts the provided one if needed.
+
+        IMPORTANT: If the initial user action has been condensed (its ID is in forgotten_event_ids),
+        we do NOT re-insert it. This prevents old instructions from being re-executed after
+        conversation condensation. The condensation summary already contains the context of
+        what was requested and completed.
+
+        Args:
+            events: The list of events to modify in-place
+            initial_user_action: The initial user message action from the full history
+            forgotten_event_ids: Set of event IDs that have been forgotten/condensed
+        """
         if (
             not events
         ):  # Should have system message from previous step, but safety check
@@ -837,6 +859,17 @@ class ConversationMemory:
             # Or raise? Let's log for now, _ensure_system_message should handle this.
             return
 
+        # Check if the initial user action has been condensed/forgotten.
+        # If so, we should NOT re-insert it to prevent re-execution of old instructions.
+        # The condensation summary already contains the context of what was requested.
+        initial_user_action_id = initial_user_action.id
+        if initial_user_action_id in forgotten_event_ids:
+            logger.info(
+                f'Initial user action (id={initial_user_action_id}) has been condensed. '
+                'Not re-inserting to prevent re-execution of old instructions.'
+            )
+            return
+
         # We expect events[0] to be SystemMessageAction after _ensure_system_message
         if len(events) == 1:
             # Only system message exists
diff --git a/openhands/memory/view.py b/openhands/memory/view.py
index 87a20b6340..81dd8bab5d 100644
--- a/openhands/memory/view.py
+++ b/openhands/memory/view.py
@@ -18,6 +18,8 @@ class View(BaseModel):
 
     events: list[Event]
     unhandled_condensation_request: bool = False
+    # Set of event IDs that have been forgotten/condensed
+    forgotten_event_ids: set[int] = set()
 
     def __len__(self) -> int:
         return len(self.events)
@@ -90,4 +92,5 @@ class View(BaseModel):
         return View(
             events=kept_events,
             unhandled_condensation_request=unhandled_condensation_request,
+            forgotten_event_ids=forgotten_event_ids,
         )
diff --git a/tests/unit/agenthub/test_agents.py b/tests/unit/agenthub/test_agents.py
index 2a90dcb668..09f28e991c 100644
--- a/tests/unit/agenthub/test_agents.py
+++ b/tests/unit/agenthub/test_agents.py
@@ -393,7 +393,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
     # 2. The action message
     # 3. The observation message
     mock_state.history = [initial_user_message, action, observation]
-    messages = agent._get_messages(mock_state.history, initial_user_message)
+    messages = agent._get_messages(mock_state.history, initial_user_message, set())
     assert len(messages) == 4  # System + initial user + action + observation
     assert messages[0].role == 'system'  # First message should be the system message
     assert (
@@ -404,7 +404,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
 
     # The same should hold if the events are presented out-of-order
     mock_state.history = [initial_user_message, observation, action]
-    messages = agent._get_messages(mock_state.history, initial_user_message)
+    messages = agent._get_messages(mock_state.history, initial_user_message, set())
     assert len(messages) == 4
     assert messages[0].role == 'system'  # First message should be the system message
     assert (
@@ -414,7 +414,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
     # If only one of the two events is present, then we should just get the system message
     # plus any valid message from the event
     mock_state.history = [initial_user_message, action]
-    messages = agent._get_messages(mock_state.history, initial_user_message)
+    messages = agent._get_messages(mock_state.history, initial_user_message, set())
     assert (
         len(messages) == 2
     )  # System + initial user message, action is waiting for its observation
@@ -422,7 +422,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
     assert messages[1].role == 'user'
 
     mock_state.history = [initial_user_message, observation]
-    messages = agent._get_messages(mock_state.history, initial_user_message)
+    messages = agent._get_messages(mock_state.history, initial_user_message, set())
     assert (
         len(messages) == 2
     )  # System + initial user message, observation has no matching action
diff --git a/tests/unit/agenthub/test_prompt_caching.py b/tests/unit/agenthub/test_prompt_caching.py
index 60cc0bb16f..2435b1320a 100644
--- a/tests/unit/agenthub/test_prompt_caching.py
+++ b/tests/unit/agenthub/test_prompt_caching.py
@@ -80,7 +80,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
     history.append(message_action_5)
 
     codeact_agent.reset()
-    messages = codeact_agent._get_messages(history, message_action_1)
+    messages = codeact_agent._get_messages(history, message_action_1, set())
 
     assert (
         len(messages) == 6
@@ -122,7 +122,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
         history.append(message_action_agent)
 
     codeact_agent.reset()
-    messages = codeact_agent._get_messages(history, initial_user_message)
+    messages = codeact_agent._get_messages(history, initial_user_message, set())
 
     # Check that only the last two user messages have cache_prompt=True
     cached_user_messages = [
diff --git a/tests/unit/memory/test_conversation_memory.py b/tests/unit/memory/test_conversation_memory.py
index abaa8d9a3d..50fd48f49a 100644
--- a/tests/unit/memory/test_conversation_memory.py
+++ b/tests/unit/memory/test_conversation_memory.py
@@ -158,7 +158,8 @@ def test_ensure_initial_user_message_adds_if_only_system(
     system_message = SystemMessageAction(content='System')
     system_message._source = EventSource.AGENT
     events = [system_message]
-    conversation_memory._ensure_initial_user_message(events, initial_user_action)
+    # Pass empty set for forgotten_event_ids (no events have been condensed)
+    conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
     assert len(events) == 2
     assert events[0] == system_message
     assert events[1] == initial_user_action
@@ -177,7 +178,8 @@ def test_ensure_initial_user_message_correct_already_present(
         agent_message,
     ]
     original_events = list(events)
-    conversation_memory._ensure_initial_user_message(events, initial_user_action)
+    # Pass empty set for forgotten_event_ids (no events have been condensed)
+    conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
     assert events == original_events
 
 
@@ -189,7 +191,8 @@ def test_ensure_initial_user_message_incorrect_at_index_1(
     incorrect_second_message = MessageAction(content='Assistant')
     incorrect_second_message._source = EventSource.AGENT
     events = [system_message, incorrect_second_message]
-    conversation_memory._ensure_initial_user_message(events, initial_user_action)
+    # Pass empty set for forgotten_event_ids (no events have been condensed)
+    conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
     assert len(events) == 3
     assert events[0] == system_message
     assert events[1] == initial_user_action  # Correct one inserted
@@ -206,7 +209,8 @@ def test_ensure_initial_user_message_correct_present_later(
     # Correct initial message is present, but later in the list
     events = [system_message, incorrect_second_message]
     conversation_memory._ensure_system_message(events)
-    conversation_memory._ensure_initial_user_message(events, initial_user_action)
+    # Pass empty set for forgotten_event_ids (no events have been condensed)
+    conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
     assert len(events) == 3  # Should still insert at index 1, not remove the later one
     assert events[0] == system_message
     assert events[1] == initial_user_action  # Correct one inserted at index 1
@@ -222,7 +226,8 @@ def test_ensure_initial_user_message_different_user_msg_at_index_1(
     different_user_message = MessageAction(content='Different User Message')
     different_user_message._source = EventSource.USER
     events = [system_message, different_user_message]
-    conversation_memory._ensure_initial_user_message(events, initial_user_action)
+    # Pass empty set for forgotten_event_ids (no events have been condensed)
+    conversation_memory._ensure_initial_user_message(events, initial_user_action, set())
     assert len(events) == 2
     assert events[0] == system_message
     assert events[1] == different_user_message  # Original second message remains
@@ -1583,3 +1588,132 @@ def test_process_ipython_observation_with_vision_disabled(
     assert isinstance(message.content[1], ImageContent)
     # Check that NO explanatory text about filtered images was added when vision is disabled
     assert 'invalid or empty image(s) were filtered' not in message.content[0].text
+
+
+def test_ensure_initial_user_message_not_reinserted_when_condensed(
+    conversation_memory, initial_user_action
+):
+    """Test that initial user message is NOT re-inserted when it has been condensed.
+
+    This is a critical test for bug #11910: Old instructions should not be re-executed
+    after conversation condensation. If the initial user message has been condensed
+    (its ID is in the forgotten_event_ids set), we should NOT re-insert it to prevent
+    the LLM from seeing old instructions as fresh commands.
+    """
+    system_message = SystemMessageAction(content='System')
+    system_message._source = EventSource.AGENT
+
+    # Simulate that the initial_user_action has been condensed by adding its ID
+    # to the forgotten_event_ids set
+    initial_user_action._id = 1  # Assign an ID to the initial user action
+    forgotten_event_ids = {1}  # The initial user action's ID is in the forgotten set
+
+    events = [system_message]  # Only system message, no user message
+
+    # Call _ensure_initial_user_message with the condensed event ID
+    conversation_memory._ensure_initial_user_message(
+        events, initial_user_action, forgotten_event_ids
+    )
+
+    # The initial user action should NOT be inserted because it was condensed
+    assert len(events) == 1
+    assert events[0] == system_message
+    # Verify the initial user action was NOT added
+    assert initial_user_action not in events
+
+
+def test_ensure_initial_user_message_reinserted_when_not_condensed(
+    conversation_memory, initial_user_action
+):
+    """Test that initial user message IS re-inserted when it has NOT been condensed.
+
+    This ensures backward compatibility: when no condensation has happened,
+    the initial user message should still be inserted as before.
+    """
+    system_message = SystemMessageAction(content='System')
+    system_message._source = EventSource.AGENT
+
+    # The initial user action has NOT been condensed
+    initial_user_action._id = 1
+    forgotten_event_ids = {5, 10, 15}  # Different IDs, not including the initial action
+
+    events = [system_message]
+
+    # Call _ensure_initial_user_message with non-matching forgotten IDs
+    conversation_memory._ensure_initial_user_message(
+        events, initial_user_action, forgotten_event_ids
+    )
+
+    # The initial user action SHOULD be inserted because it was NOT condensed
+    assert len(events) == 2
+    assert events[0] == system_message
+    assert events[1] == initial_user_action
+
+
+def test_process_events_does_not_reinsert_condensed_initial_message(
+    conversation_memory,
+):
+    """Test that process_events does not re-insert initial user message when condensed.
+
+    This is an integration test for the full process_events flow, verifying that
+    when the initial user message has been condensed, it is not re-inserted into
+    the conversation sent to the LLM.
+    """
+    # Create a system message
+    system_message = SystemMessageAction(content='System message')
+    system_message._source = EventSource.AGENT
+    system_message._id = 0
+
+    # Create the initial user message (will be marked as condensed)
+    initial_user_message = MessageAction(content='Do task A, B, and C')
+    initial_user_message._source = EventSource.USER
+    initial_user_message._id = 1
+
+    # Create a condensation summary observation
+    from openhands.events.observation.agent import AgentCondensationObservation
+
+    condensation_summary = AgentCondensationObservation(
+        content='Summary: User requested tasks A, B, C. Task A was completed successfully.'
+    )
+    condensation_summary._id = 2
+
+    # Create a recent user message (not condensed)
+    recent_user_message = MessageAction(content='Now continue with task D')
+    recent_user_message._source = EventSource.USER
+    recent_user_message._id = 3
+
+    # Simulate condensed history: system + summary + recent message
+    # The initial user message (id=1) has been condensed/forgotten
+    condensed_history = [system_message, condensation_summary, recent_user_message]
+
+    # The initial user message's ID is in the forgotten set
+    forgotten_event_ids = {1}
+
+    messages = conversation_memory.process_events(
+        condensed_history=condensed_history,
+        initial_user_action=initial_user_message,
+        forgotten_event_ids=forgotten_event_ids,
+        max_message_chars=None,
+        vision_is_active=False,
+    )
+
+    # Verify the structure of messages
+    # Should have: system, condensation summary, recent user message
+    # Should NOT have the initial user message "Do task A, B, and C"
+    assert len(messages) == 3
+    assert messages[0].role == 'system'
+    assert messages[0].content[0].text == 'System message'
+
+    # The second message should be the condensation summary, NOT the initial user message
+    assert messages[1].role == 'user'
+    assert 'Summary: User requested tasks A, B, C' in messages[1].content[0].text
+
+    # The third message should be the recent user message
+    assert messages[2].role == 'user'
+    assert 'Now continue with task D' in messages[2].content[0].text
+
+    # Critically, the old instruction should NOT appear
+    for msg in messages:
+        for content in msg.content:
+            if hasattr(content, 'text'):
+                assert 'Do task A, B, and C' not in content.text