Refactor to clean up and move utility/legacy out of the agent (#7917)

2025-12-26 05:48:36 +08:00 · 2025-04-18 19:53:33 +02:00 · 2025-04-18 19:53:33 +02:00 · a2c55cfdef
commit a2c55cfdef
parent 76cad626ed
16 changed files with 118 additions and 158 deletions
--- a/docs/modules/usage/how-to/custom-sandbox-guide.md
+++ b/docs/modules/usage/how-to/custom-sandbox-guide.md
@ -1,7 +1,7 @@
 # Custom Sandbox

 :::note
-This guide is for users that would like to use their own custom Docker image for the runtime. For example 
+This guide is for users that would like to use their own custom Docker image for the runtime. For example
 with certain tools or programming languages pre-installed.
 :::

--- a/evaluation/benchmarks/swe_bench/resource/swt_bench_constants.py
+++ b/evaluation/benchmarks/swe_bench/resource/swt_bench_constants.py
@ -837,4 +837,6 @@ MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
        k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
    },
 }
-MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = './tests/runtests.py --verbosity 2'
+MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
+    './tests/runtests.py --verbosity 2'
+)
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@ -6208,4 +6208,4 @@
        "tr": "Çalışma zamanının başlamasını bekliyor...",
        "de": "Warten auf den Start der Laufzeit..."
    }
-}
+}
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@ -6,12 +6,11 @@ from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
-from openhands.core.message import Message, TextContent
+from openhands.core.message import Message
 from openhands.events.action import (
    Action,
    AgentFinishAction,
 )
-from openhands.events.action.message import SystemMessageAction
 from openhands.events.event import Event
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
@ -193,75 +192,14 @@ class CodeActAgent(Agent):
        if not self.prompt_manager:
            raise Exception('Prompt Manager not instantiated.')

-        # Check if there's a SystemMessageAction in the events
-        has_system_message = any(
-            isinstance(event, SystemMessageAction) for event in events
-        )
-
-        # Legacy behavior: If no SystemMessageAction is found, add one
-        if not has_system_message:
-            logger.warning(
-                f'[{self.name}] No SystemMessageAction found in events. '
-                'Adding one for backward compatibility. '
-                'This is deprecated behavior and will be removed in a future version.'
-            )
-            system_message = self.get_system_message()
-            if system_message:
-                # Create a copy and insert at the beginning of the list
-                processed_events = list(events)
-                processed_events.insert(0, system_message)
-                logger.debug(
-                    f'[{self.name}] Added SystemMessageAction for backward compatibility'
-                )
-        else:
-            processed_events = events
-
        # Use ConversationMemory to process events (including SystemMessageAction)
        messages = self.conversation_memory.process_events(
-            condensed_history=processed_events,
+            condensed_history=events,
            max_message_chars=self.llm.config.max_message_chars,
            vision_is_active=self.llm.vision_is_active(),
        )

-        messages = self._enhance_messages(messages)
-
        if self.llm.is_caching_prompt_active():
            self.conversation_memory.apply_prompt_caching(messages)

        return messages
-
-    def _enhance_messages(self, messages: list[Message]) -> list[Message]:
-        """Enhances the user message with additional context based on keywords matched.
-
-        Args:
-            messages (list[Message]): The list of messages to enhance
-
-        Returns:
-            list[Message]: The enhanced list of messages
-        """
-        assert self.prompt_manager, 'Prompt Manager not instantiated.'
-
-        results: list[Message] = []
-        is_first_message_handled = False
-        prev_role = None
-
-        for msg in messages:
-            if msg.role == 'user' and not is_first_message_handled:
-                is_first_message_handled = True
-                # compose the first user message with examples
-                self.prompt_manager.add_examples_to_initial_message(msg)
-
-            elif msg.role == 'user':
-                # Add double newline between consecutive user messages
-                if prev_role == 'user' and len(msg.content) > 0:
-                    # Find the first TextContent in the message to add newlines
-                    for content_item in msg.content:
-                        if isinstance(content_item, TextContent):
-                            # If the previous message was also from a user, prepend two newlines to ensure separation
-                            content_item.text = '\n\n' + content_item.text
-                            break
-
-            results.append(msg)
-            prev_role = msg.role
-
-        return results
--- a/openhands/integrations/github/templates/issue_comment_prompt.j2
+++ b/openhands/integrations/github/templates/issue_comment_prompt.j2
@ -17,4 +17,4 @@ When you're done, make sure to

 1. Use the `GITHUB_TOKEN` environment variable and GitHub API to open a new PR
 2. The PR description should mention that it "fixes" or "closes" the issue number
-3. Make sure to leave the following sentence at the end of the PR description: `@{{ username }} can click here to [continue refining the PR]({{ conversation_url }})`
+3. Make sure to leave the following sentence at the end of the PR description: `@{{ username }} can click here to [continue refining the PR]({{ conversation_url }})`
--- a/openhands/integrations/github/templates/issue_labeled_prompt.j2
+++ b/openhands/integrations/github/templates/issue_labeled_prompt.j2
@ -9,4 +9,4 @@ When you're done, make sure to

 1. Use the `GITHUB_TOKEN` environment variable and GitHub API to open a new PR
 2. The PR description should mention that it "fixes" or "closes" the issue number
-3. Make sure to leave the following sentence at the end of the PR description: `@{{ username }} can click here to [continue refining the PR]({{ conversation_url }})`
+3. Make sure to leave the following sentence at the end of the PR description: `@{{ username }} can click here to [continue refining the PR]({{ conversation_url }})`
--- a/openhands/integrations/github/templates/pr_update_prompt.j2
+++ b/openhands/integrations/github/templates/pr_update_prompt.j2
@ -1,5 +1,5 @@
 You are checked out to branch {{ branch_name }}, which has an open PR #{{ pr_number }}.
-A comment on the PR (below) has been addressed to you. Do NOT respond to this comment via the GitHub API. 
+A comment on the PR (below) has been addressed to you. Do NOT respond to this comment via the GitHub API.

 {% if file_location %} The comment is on the file `{{ file_location }}`{% endif %}.

@ -22,4 +22,4 @@ If it's a question:
 If it requests a code update:
    1. Modify the code accordingly in the current branch
    2. Push the changes to update the PR
-    3. DO NOT leave any comments on the PR
+    3. DO NOT leave any comments on the PR
--- a/openhands/memory/conversation_memory.py
+++ b/openhands/memory/conversation_memory.py
@ -70,6 +70,9 @@ class ConversationMemory:

        events = condensed_history

+        # Ensure the system message exists (handles legacy cases)
+        self._ensure_system_message(events)
+
        # log visual browsing status
        logger.debug(f'Visual browsing: {self.agent_config.enable_som_visual_browsing}')

@ -128,9 +131,33 @@ class ConversationMemory:
                pending_tool_call_action_messages.pop(response_id)

            messages += messages_to_add
+
+        # Apply final filtering so that the messages in context don't have unmatched tool calls
+        # and tool responses, for example
        messages = list(ConversationMemory._filter_unmatched_tool_calls(messages))
+
+        # Apply final formatting
+        messages = self._apply_user_message_formatting(messages)
+
        return messages

+    def _apply_user_message_formatting(self, messages: list[Message]) -> list[Message]:
+        """Applies formatting rules, such as adding newlines between consecutive user messages."""
+        formatted_messages = []
+        prev_role = None
+        for msg in messages:
+            # Add double newline between consecutive user messages
+            if msg.role == 'user' and prev_role == 'user' and len(msg.content) > 0:
+                # Find the first TextContent in the message to add newlines
+                for content_item in msg.content:
+                    if isinstance(content_item, TextContent):
+                        # Prepend two newlines to ensure visual separation
+                        content_item.text = '\n\n' + content_item.text
+                        break
+            formatted_messages.append(msg)
+            prev_role = msg.role  # Update prev_role after processing each message
+        return formatted_messages
+
    def _process_action(
        self,
        action: Action,
@ -653,3 +680,25 @@ class ConversationMemory:
            else:
                # Any other case is kept
                yield message
+
+    def _ensure_system_message(self, events: list[Event]) -> None:
+        """Checks if a SystemMessageAction exists and adds one if not (for legacy compatibility)."""
+        # Check if there's a SystemMessageAction in the events
+        has_system_message = any(
+            isinstance(event, SystemMessageAction) for event in events
+        )
+
+        # Legacy behavior: If no SystemMessageAction is found, add one
+        if not has_system_message:
+            logger.debug(
+                '[ConversationMemory] No SystemMessageAction found in events. '
+                'Adding one for backward compatibility. '
+            )
+            system_prompt = self.prompt_manager.get_system_message()
+            if system_prompt:
+                system_message = SystemMessageAction(content=system_prompt)
+                # Insert the system message directly at the beginning of the events list
+                events.insert(0, system_message)
+                logger.debug(
+                    '[ConversationMemory] Added SystemMessageAction for backward compatibility'
+                )
--- a/openhands/runtime/impl/action_execution/action_execution_client.py
+++ b/openhands/runtime/impl/action_execution/action_execution_client.py
@ -1,7 +1,6 @@
 import os
 import tempfile
 import threading
-from abc import abstractmethod
 from pathlib import Path
 from typing import Any
 from zipfile import ZipFile
@ -96,7 +95,7 @@ class ActionExecutionClient(Runtime):

    @property
    def action_execution_server_url(self) -> str:
-        raise NotImplementedError("Action execution server URL is not implemented")
+        raise NotImplementedError('Action execution server URL is not implemented')

    @property
    def runtime_initialized(self) -> bool:
--- a/openhands/server/conversation_manager/standalone_conversation_manager.py
+++ b/openhands/server/conversation_manager/standalone_conversation_manager.py
@ -258,7 +258,9 @@ class StandaloneConversationManager(ConversationManager):
    ) -> EventStore:
        logger.info(f'maybe_start_agent_loop:{sid}', extra={'session_id': sid})
        if not await self.is_agent_loop_running(sid):
-            await self._start_agent_loop(sid, settings, user_id, initial_user_msg, replay_json, github_user_id)
+            await self._start_agent_loop(
+                sid, settings, user_id, initial_user_msg, replay_json, github_user_id
+            )

        event_store = await self._get_event_store(sid, user_id)
        if not event_store:
@ -312,9 +314,7 @@ class StandaloneConversationManager(ConversationManager):
        try:
            session.agent_session.event_stream.subscribe(
                EventStreamSubscriber.SERVER,
-                self._create_conversation_update_callback(
-                    user_id, github_user_id, sid
-                ),
+                self._create_conversation_update_callback(user_id, github_user_id, sid),
                UPDATED_AT_CALLBACK_ID,
            )
        except ValueError:
--- a/openhands/server/routes/manage_conversations.py
+++ b/openhands/server/routes/manage_conversations.py
@ -33,7 +33,10 @@ from openhands.server.shared import (
    file_store,
 )
 from openhands.server.types import LLMAuthenticationError, MissingSettingsError
-from openhands.storage.data_models.conversation_metadata import ConversationMetadata, ConversationTrigger
+from openhands.storage.data_models.conversation_metadata import (
+    ConversationMetadata,
+    ConversationTrigger,
+)
 from openhands.storage.data_models.conversation_status import ConversationStatus
 from openhands.utils.async_utils import wait_all
 from openhands.utils.conversation_summary import generate_conversation_title
@ -58,7 +61,7 @@ async def _create_new_conversation(
    image_urls: list[str] | None,
    replay_json: str | None,
    conversation_trigger: ConversationTrigger = ConversationTrigger.GUI,
-    attach_convo_id: bool = False
+    attach_convo_id: bool = False,
 ):
    logger.info(
        'Creating conversation',
--- a/openhands/storage/data_models/conversation_metadata.py
+++ b/openhands/storage/data_models/conversation_metadata.py
@ -1,7 +1,6 @@
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from enum import Enum
-from openhands.integrations.service_types import ProviderType


 class ConversationTrigger(Enum):
@ -10,7 +9,7 @@ class ConversationTrigger(Enum):


@dataclass
-class ConversationMetadata:    
+class ConversationMetadata:
    conversation_id: str
    github_user_id: str | None
    selected_repository: str | None
--- a/openhands/utils/prompt.py
+++ b/openhands/utils/prompt.py
@ -57,10 +57,10 @@ class PromptManager:
        return self.system_template.render().strip()

    def get_example_user_message(self) -> str:
-        """This is the initial user message provided to the agent
+        """This is an initial user message that can be provided to the agent
        before *actual* user instructions are provided.

-        It is used to provide a demonstration of how the agent
+        It can be used to provide a demonstration of how the agent
        should behave in order to solve the user's task. And it may
        optionally contain some additional context about the user's task.
        These additional context will convert the current generic agent
@ -69,14 +69,6 @@ class PromptManager:

        return self.user_template.render().strip()

-    def add_examples_to_initial_message(self, message: Message) -> None:
-        """Add example_message to the first user message."""
-        example_message = self.get_example_user_message() or None
-
-        # Insert it at the start of the TextContent list
-        if example_message:
-            message.content.insert(0, TextContent(text=example_message))
-
    def build_workspace_context(
        self,
        repository_info: RepositoryInfo | None,
--- a/tests/unit/test_codeact_agent.py
+++ b/tests/unit/test_codeact_agent.py
@ -361,9 +361,6 @@ def test_enhance_messages_adds_newlines_between_consecutive_user_messages(
    """Test that _enhance_messages adds newlines between consecutive user messages."""
    # Set up the prompt manager
    agent.prompt_manager = Mock()
-    agent.prompt_manager.add_examples_to_initial_message = Mock()
-    agent.prompt_manager.add_info_to_initial_message = Mock()
-    agent.prompt_manager.enhance_message = Mock()

    # Create consecutive user messages with various content types
    messages = [
@ -393,7 +390,9 @@ def test_enhance_messages_adds_newlines_between_consecutive_user_messages(
    ]

    # Call _enhance_messages
-    enhanced_messages = agent._enhance_messages(messages)
+    enhanced_messages = agent.conversation_memory._apply_user_message_formatting(
+        messages
+    )

    # Verify newlines were added correctly
    assert enhanced_messages[1].content[0].text.startswith('\n\n')
--- a/tests/unit/test_conversation_memory.py
+++ b/tests/unit/test_conversation_memory.py
@ -131,8 +131,8 @@ def test_process_events_with_cmd_output_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -154,8 +154,8 @@ def test_process_events_with_ipython_run_cell_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -178,8 +178,8 @@ def test_process_events_with_agent_delegate_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -195,8 +195,8 @@ def test_process_events_with_error_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -232,8 +232,8 @@ def test_process_events_with_file_edit_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -253,8 +253,8 @@ def test_process_events_with_file_read_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -276,8 +276,8 @@ def test_process_events_with_browser_output_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -293,8 +293,8 @@ def test_process_events_with_user_reject_observation(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -323,8 +323,8 @@ def test_process_events_with_empty_environment_info(conversation_memory):
        vision_is_active=False,
    )

-    # Should only contain no messages
-    assert len(messages) == 0
+    # Should only contain no messages except system message
+    assert len(messages) == 1

    # Verify that build_workspace_context was NOT called since all input values were empty
    conversation_memory.prompt_manager.build_workspace_context.assert_not_called()
@ -355,7 +355,7 @@ def test_process_events_with_function_calling_observation(conversation_memory):
    )

    # No direct message when using function calling
-    assert len(messages) == 0  # should be no messages
+    assert len(messages) == 1  # should be no messages except system message


 def test_process_events_with_message_action_with_image(conversation_memory):
@ -371,8 +371,8 @@ def test_process_events_with_message_action_with_image(conversation_memory):
        vision_is_active=True,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'assistant'
    assert len(result.content) == 2
    assert isinstance(result.content[0], TextContent)
@ -391,8 +391,8 @@ def test_process_events_with_user_cmd_action(conversation_memory):
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -424,8 +424,8 @@ def test_process_events_with_agent_finish_action_with_tool_metadata(
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'assistant'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -467,8 +467,8 @@ def test_process_events_with_environment_microagent_observation(conversation_mem
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -522,8 +522,8 @@ def test_process_events_with_knowledge_microagent_microagent_observation(
        vision_is_active=False,
    )

-    assert len(messages) == 1
-    result = messages[0]
+    assert len(messages) == 2
+    result = messages[1]
    assert result.role == 'user'
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
@ -566,7 +566,7 @@ def test_process_events_with_microagent_observation_extensions_disabled(
    )

    # When prompt extensions are disabled, the RecallObservation should be ignored
-    assert len(messages) == 0  # should be no messages
+    assert len(messages) == 1  # should be no messages except system message

    # Verify the prompt_manager was not called
    conversation_memory.prompt_manager.build_workspace_context.assert_not_called()
@ -588,7 +588,7 @@ def test_process_events_with_empty_microagent_knowledge(conversation_memory):
    )

    # The implementation returns an empty string and it doesn't creates a message
-    assert len(messages) == 0  # should be no messages
+    assert len(messages) == 1  # should be no messages except system message

    # When there are no triggered agents, build_microagent_info is not called
    conversation_memory.prompt_manager.build_microagent_info.assert_not_called()
@ -800,12 +800,12 @@ def test_process_events_with_microagent_observation_deduplication(conversation_m
    )

    # Verify that only the first occurrence of content for each agent is included
-    assert len(messages) == 1
+    assert len(messages) == 2  # with system message

    # First microagent should include all agents since they appear here first
-    assert 'Image best practices v1' in messages[0].content[0].text
-    assert 'Git best practices v1' in messages[0].content[0].text
-    assert 'Python best practices v1' in messages[0].content[0].text
+    assert 'Image best practices v1' in messages[1].content[0].text
+    assert 'Git best practices v1' in messages[1].content[0].text
+    assert 'Python best practices v1' in messages[1].content[0].text


 def test_process_events_with_microagent_observation_deduplication_disabled_agents(
@ -849,11 +849,11 @@ def test_process_events_with_microagent_observation_deduplication_disabled_agent
    )

    # Verify that disabled agents are filtered out and only the first occurrence of enabled agents is included
-    assert len(messages) == 1
+    assert len(messages) == 2

    # First microagent should include enabled_agent but not disabled_agent
-    assert 'Disabled agent content' not in messages[0].content[0].text
-    assert 'Enabled agent content v1' in messages[0].content[0].text
+    assert 'Disabled agent content' not in messages[1].content[0].text
+    assert 'Enabled agent content v1' in messages[1].content[0].text


 def test_process_events_with_microagent_observation_deduplication_empty(
@ -873,7 +873,10 @@ def test_process_events_with_microagent_observation_deduplication_empty(
    )

    # Verify that empty RecallObservations are handled gracefully
-    assert len(messages) == 0  # an empty microagent is not added to Messages
+    assert (
+        len(messages) == 1
+    )  # an empty microagent is not added to Messages, only system message is found
+    assert messages[0].role == 'system'


 def test_has_agent_in_earlier_events(conversation_memory):
--- a/tests/unit/test_prompt_manager.py
+++ b/tests/unit/test_prompt_manager.py
@ -147,30 +147,6 @@ This is information from agent 2
    assert result.strip() == ''


-def test_add_examples_to_initial_message(prompt_dir):
-    """Test adding example messages to an initial message."""
-    # Create a user_prompt.j2 template file
-    with open(os.path.join(prompt_dir, 'user_prompt.j2'), 'w') as f:
-        f.write('This is an example user message')
-
-    # Initialize the PromptManager
-    manager = PromptManager(prompt_dir=prompt_dir)
-
-    # Create a message
-    message = Message(role='user', content=[TextContent(text='Original content')])
-
-    # Add examples to the message
-    manager.add_examples_to_initial_message(message)
-
-    # Check that the example was added at the beginning
-    assert len(message.content) == 2
-    assert message.content[0].text == 'This is an example user message'
-    assert message.content[1].text == 'Original content'
-
-    # Clean up
-    os.remove(os.path.join(prompt_dir, 'user_prompt.j2'))
-
-
 def test_add_turns_left_reminder(prompt_dir):
    """Test adding turns left reminder to messages."""
    # Initialize the PromptManager