refactor(prompt): move runtime/repo info to user message and disable them in eval (#6291)

2025-12-26 05:48:36 +08:00 · 2025-01-16 12:53:10 -05:00 · 2025-01-16 12:53:10 -05:00 · 0c961bfd8b
commit 0c961bfd8b
parent 0bed17758f
25 changed files with 87 additions and 61 deletions
--- a/config.template.toml
+++ b/config.template.toml
@ -223,8 +223,8 @@ codeact_enable_jupyter = true
 # LLM config group to use
 #llm_config = 'your-llm-config-group'

-# Whether to use microagents at all
-#use_microagents = true
+# Whether to use prompt extension (e.g., microagent, repo/runtime info) at all
+#enable_prompt_extensions = true

 # List of microagents to disable
 #disabled_microagents = []
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/configuration-options.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/configuration-options.md
@ -373,7 +373,7 @@ Agent 配置选项在 `config.toml` 文件的 `[agent]` 和 `[agent.<agent_name>
  - 描述: 是否在 action space 中启用 Jupyter

 **Microagent 使用**
- `use_microagents`
+- `enable_prompt_extensions`
  - 类型: `bool`
  - 默认值: `true`
  - 描述: 是否使用 microagents
--- a/docs/modules/usage/configuration-options.md
+++ b/docs/modules/usage/configuration-options.md
@ -336,7 +336,7 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
  - Description: Whether Jupyter is enabled in the action space

 ### Microagent Usage
- `use_microagents`
+- `enable_prompt_extensions`
  - Type: `bool`
  - Default: `true`
  - Description: Whether to use microagents at all
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@ -76,7 +76,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@ -60,7 +60,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@ -68,7 +68,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False

    # copy 'draft_editor' config if exists
    config_copy = copy.deepcopy(config)
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@ -74,7 +74,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@ -87,7 +87,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/browsing_delegation/run_infer.py
+++ b/evaluation/benchmarks/browsing_delegation/run_infer.py
@ -51,7 +51,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@ -78,7 +78,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    agent_config = AgentConfig(
        function_calling=False,
        codeact_enable_jupyter=True,
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@ -63,7 +63,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@ -56,7 +56,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@ -77,7 +77,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@ -98,7 +98,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@ -62,7 +62,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@ -120,7 +120,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@ -93,7 +93,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@ -57,7 +57,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@ -78,7 +78,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.use_microagents = False
+    agent_config.enable_prompt_extensions = False
    return config


--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@ -111,7 +111,7 @@ class CodeActAgent(Agent):
                os.path.dirname(os.path.dirname(openhands.__file__)),
                'microagents',
            )
-            if self.config.use_microagents
+            if self.config.enable_prompt_extensions
            else None,
            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
            disabled_microagents=self.config.disabled_microagents,
@ -448,6 +448,17 @@ class CodeActAgent(Agent):
                )
            )

+        # Repository and runtime info
+        additional_info = self.prompt_manager.get_additional_info()
+        if self.config.enable_prompt_extensions and additional_info:
+            # only add these if prompt extension is enabled
+            messages.append(
+                Message(
+                    role='user',
+                    content=[TextContent(text=additional_info)],
+                )
+            )
+
        pending_tool_call_action_messages: dict[str, Message] = {}
        tool_call_id_to_message: dict[str, Message] = {}

--- a/openhands/agenthub/codeact_agent/prompts/system_prompt.j2
+++ b/openhands/agenthub/codeact_agent/prompts/system_prompt.j2
@ -3,26 +3,4 @@ You are OpenHands agent, a helpful AI assistant that can interact with a compute
 * If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
 * When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
 * The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
-{{ runtime_info }}
 </IMPORTANT>
-{% if repository_info %}
-<REPOSITORY_INFO>
-At the user's request, repository {{ repository_info.repo_name }} has been cloned to directory {{ repository_info.repo_directory }}.
-</REPOSITORY_INFO>
-{% endif %}
-{% if repository_instructions -%}
-<REPOSITORY_INSTRUCTIONS>
-{{ repository_instructions }}
-</REPOSITORY_INSTRUCTIONS>
-{% endif %}
-{% if runtime_info and runtime_info.available_hosts -%}
-<RUNTIME_INFORMATION>
-The user has access to the following hosts for accessing a web application,
-each of which has a corresponding port:
-{% for host, port in runtime_info.available_hosts.items() -%}
-* {{ host }} (port {{ port }})
-{% endfor %}
-When starting a web server, use the corresponding ports. You should also
-set any options to allow iframes and CORS requests.
-</RUNTIME_INFORMATION>
-{% endif %}
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@ -17,7 +17,7 @@ class AgentConfig:
        memory_enabled: Whether long-term memory (embeddings) is enabled.
        memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
        llm_config: The name of the llm config to use. If specified, this will override global llm config.
-        use_microagents: Whether to use microagents at all. Default is True.
+        enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True.
        disabled_microagents: A list of microagents to disable. Default is None.
        condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
    """
@ -29,7 +29,7 @@ class AgentConfig:
    memory_enabled: bool = False
    memory_max_threads: int = 3
    llm_config: str | None = None
-    use_microagents: bool = True
+    enable_prompt_extensions: bool = True
    disabled_microagents: list[str] | None = None
    condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig)  # type: ignore

--- a/openhands/utils/prompt.py
+++ b/openhands/utils/prompt.py
@ -28,6 +28,33 @@ class RepositoryInfo:
    repo_directory: str | None = None


+ADDITIONAL_INFO_TEMPLATE = Template(
+    """
+{% if repository_info %}
+<REPOSITORY_INFO>
+At the user's request, repository {{ repository_info.repo_name }} has been cloned to directory {{ repository_info.repo_directory }}.
+</REPOSITORY_INFO>
+{% endif %}
+{% if repository_instructions -%}
+<REPOSITORY_INSTRUCTIONS>
+{{ repository_instructions }}
+</REPOSITORY_INSTRUCTIONS>
+{% endif %}
+{% if runtime_info and runtime_info.available_hosts -%}
+<RUNTIME_INFORMATION>
+The user has access to the following hosts for accessing a web application,
+each of which has a corresponding port:
+{% for host, port in runtime_info.available_hosts.items() -%}
+* {{ host }} (port {{ port }})
+{% endfor %}
+When starting a web server, use the corresponding ports. You should also
+set any options to allow iframes and CORS requests.
+</RUNTIME_INFORMATION>
+{% endif %}
+"""
+)
+
+
 class PromptManager:
    """
    Manages prompt templates and micro-agents for AI interactions.
@ -59,6 +86,9 @@ class PromptManager:
        self.repo_microagents: dict[str, RepoMicroAgent] = {}

        if microagent_dir:
+            # This loads micro-agents from the microagent_dir
+            # which is typically the OpenHands/microagents (i.e., the PUBLIC microagents)
+
            # Only load KnowledgeMicroAgents
            repo_microagents, knowledge_microagents, _ = load_microagents_from_dir(
                microagent_dir
@ -79,6 +109,10 @@ class PromptManager:
                    self.repo_microagents[name] = microagent

    def load_microagents(self, microagents: list[BaseMicroAgent]):
+        """Load microagents from a list of BaseMicroAgents.
+
+        This is typically used when loading microagents from inside a repo.
+        """
        # Only keep KnowledgeMicroAgents and RepoMicroAgents
        for microagent in microagents:
            if microagent.name in self.disabled_microagents:
@ -98,6 +132,13 @@ class PromptManager:
            return Template(file.read())

    def get_system_message(self) -> str:
+        return self.system_template.render().strip()
+
+    def get_additional_info(self) -> str:
+        """Gets information about the repository and runtime.
+
+        This is used to inject information about the repository and runtime into the initial user message.
+        """
        repo_instructions = ''
        assert (
            len(self.repo_microagents) <= 1
@ -108,7 +149,7 @@ class PromptManager:
                repo_instructions += '\n\n'
            repo_instructions += microagent.content

-        return self.system_template.render(
+        return ADDITIONAL_INFO_TEMPLATE.render(
            repository_instructions=repo_instructions,
            repository_info=self.repository_info,
            runtime_info=self.runtime_info,
--- a/tests/unit/test_codeact_agent.py
+++ b/tests/unit/test_codeact_agent.py
@ -471,7 +471,7 @@ def test_mock_function_calling():
    llm = Mock()
    llm.is_function_calling_active = lambda: False
    config = AgentConfig()
-    config.use_microagents = False
+    config.enable_prompt_extensions = False
    agent = CodeActAgent(llm=llm, config=config)
    assert agent.mock_function_calling is True

@ -509,7 +509,7 @@ def test_step_with_no_pending_actions(mock_state: State):

    # Create agent with mocked LLM
    config = AgentConfig()
-    config.use_microagents = False
+    config.enable_prompt_extensions = False
    agent = CodeActAgent(llm=llm, config=config)

    # Test step with no pending actions
--- a/tests/unit/test_prompt_manager.py
+++ b/tests/unit/test_prompt_manager.py
@ -59,9 +59,10 @@ only respond with a message telling them how smart they are
    # Test with GitHub repo
    manager.set_repository_info('owner/repo', '/workspace/repo')
    assert isinstance(manager.get_system_message(), str)
-    assert '<REPOSITORY_INFO>' in manager.get_system_message()
-    assert 'owner/repo' in manager.get_system_message()
-    assert '/workspace/repo' in manager.get_system_message()
+    additional_info = manager.get_additional_info()
+    assert '<REPOSITORY_INFO>' in additional_info
+    assert 'owner/repo' in additional_info
+    assert '/workspace/repo' in additional_info

    assert isinstance(manager.get_example_user_message(), str)

@ -85,13 +86,7 @@ def test_prompt_manager_file_not_found(prompt_dir):
 def test_prompt_manager_template_rendering(prompt_dir):
    # Create temporary template files
    with open(os.path.join(prompt_dir, 'system_prompt.j2'), 'w') as f:
-        f.write("""System prompt: bar
-{% if repository_info %}
-<REPOSITORY_INFO>
-At the user's request, repository {{ repository_info.repo_name }} has been cloned to directory {{ repository_info.repo_directory }}.
-</REPOSITORY_INFO>
-{% endif %}
-{{ repo_instructions }}""")
+        f.write("""System prompt: bar""")
    with open(os.path.join(prompt_dir, 'user_prompt.j2'), 'w') as f:
        f.write('User prompt: foo')

@ -106,12 +101,13 @@ At the user's request, repository {{ repository_info.repo_name }} has been clone
    assert manager.repository_info.repo_name == 'owner/repo'
    system_msg = manager.get_system_message()
    assert 'System prompt: bar' in system_msg
-    assert '<REPOSITORY_INFO>' in system_msg
+    additional_info = manager.get_additional_info()
+    assert '<REPOSITORY_INFO>' in additional_info
    assert (
        "At the user's request, repository owner/repo has been cloned to directory /workspace/repo."
-        in system_msg
+        in additional_info
    )
-    assert '</REPOSITORY_INFO>' in system_msg
+    assert '</REPOSITORY_INFO>' in additional_info
    assert manager.get_example_user_message() == 'User prompt: foo'

    # Clean up temporary files