Fix Windows prompt refinement: ensure 'bash' is replaced with 'powershell' in all prompts (#10179)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Boxuan Li 2025-08-08 20:28:36 -07:00 committed by GitHub
parent 3eecac2003
commit 803bdced9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 194 additions and 3 deletions

View File

@ -1,3 +1,4 @@
import re
import sys import sys
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
@ -37,7 +38,16 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.
def refine_prompt(prompt: str): def refine_prompt(prompt: str):
if sys.platform == 'win32': if sys.platform == 'win32':
return prompt.replace('bash', 'powershell') # Replace 'bash' with 'powershell' including tool names like 'execute_bash'
# First replace 'execute_bash' with 'execute_powershell' to handle tool names
result = re.sub(
r'\bexecute_bash\b', 'execute_powershell', prompt, flags=re.IGNORECASE
)
# Then replace standalone 'bash' with 'powershell'
result = re.sub(
r'(?<!execute_)(?<!_)\bbash\b', 'powershell', result, flags=re.IGNORECASE
)
return result
return prompt return prompt

View File

@ -383,7 +383,7 @@ Do NOT assume the environment is the same as in the example above.
""" """
example = example.lstrip() example = example.lstrip()
return example return refine_prompt(example)
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools

View File

@ -4,6 +4,7 @@ from itertools import islice
from jinja2 import Template from jinja2 import Template
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
from openhands.controller.state.state import State from openhands.controller.state.state import State
from openhands.core.message import Message, TextContent from openhands.core.message import Message, TextContent
from openhands.events.observation.agent import MicroagentKnowledge from openhands.events.observation.agent import MicroagentKnowledge
@ -91,7 +92,8 @@ class PromptManager:
return Template(file.read()) return Template(file.read())
def get_system_message(self) -> str: def get_system_message(self) -> str:
return self.system_template.render().strip() system_message = self.system_template.render().strip()
return refine_prompt(system_message)
def get_example_user_message(self) -> str: def get_example_user_message(self) -> str:
"""This is an initial user message that can be provided to the agent """This is an initial user message that can be provided to the agent

View File

@ -0,0 +1,179 @@
import sys
from unittest.mock import patch
import pytest
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.core.config import AgentConfig
from openhands.llm.llm import LLM
# Skip all tests in this module if not running on Windows
pytestmark = pytest.mark.skipif(
sys.platform != 'win32', reason='Windows prompt refinement tests require Windows'
)
@pytest.fixture
def mock_llm():
"""Create a mock LLM for testing."""
llm = LLM(config={'model': 'gpt-4', 'api_key': 'test'})
return llm
@pytest.fixture
def agent_config():
"""Create a basic agent config for testing."""
return AgentConfig()
def test_codeact_agent_system_prompt_no_bash_on_windows(mock_llm, agent_config):
"""Test that CodeActAgent's system prompt doesn't contain 'bash' on Windows."""
# Create a CodeActAgent instance
agent = CodeActAgent(llm=mock_llm, config=agent_config)
# Get the system prompt
system_prompt = agent.prompt_manager.get_system_message()
# Assert that 'bash' doesn't exist in the system prompt (case-insensitive)
assert 'bash' not in system_prompt.lower(), (
f"System prompt contains 'bash' on Windows platform. "
f"It should be replaced with 'powershell'. "
f'System prompt: {system_prompt}'
)
# Verify that 'powershell' exists instead (case-insensitive)
assert 'powershell' in system_prompt.lower(), (
f"System prompt should contain 'powershell' on Windows platform. "
f'System prompt: {system_prompt}'
)
def test_codeact_agent_tool_descriptions_no_bash_on_windows(mock_llm, agent_config):
"""Test that CodeActAgent's tool descriptions don't contain 'bash' on Windows."""
# Create a CodeActAgent instance
agent = CodeActAgent(llm=mock_llm, config=agent_config)
# Get the tools
tools = agent.tools
# Check each tool's description and parameters
for tool in tools:
if tool['type'] == 'function':
function_info = tool['function']
# Check function description
description = function_info.get('description', '')
assert 'bash' not in description.lower(), (
f"Tool '{function_info['name']}' description contains 'bash' on Windows. "
f'Description: {description}'
)
# Check parameter descriptions
parameters = function_info.get('parameters', {})
properties = parameters.get('properties', {})
for param_name, param_info in properties.items():
param_description = param_info.get('description', '')
assert 'bash' not in param_description.lower(), (
f"Tool '{function_info['name']}' parameter '{param_name}' "
f"description contains 'bash' on Windows. "
f'Parameter description: {param_description}'
)
def test_in_context_learning_example_no_bash_on_windows():
"""Test that in-context learning examples don't contain 'bash' on Windows."""
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
from openhands.agenthub.codeact_agent.tools.finish import FinishTool
from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
create_str_replace_editor_tool,
)
from openhands.llm.fn_call_converter import get_example_for_tools
# Create a sample set of tools
tools = [
create_cmd_run_tool(),
create_str_replace_editor_tool(),
FinishTool,
]
# Get the in-context learning example
example = get_example_for_tools(tools)
# Assert that 'bash' doesn't exist in the example (case-insensitive)
assert 'bash' not in example.lower(), (
f"In-context learning example contains 'bash' on Windows platform. "
f"It should be replaced with 'powershell'. "
f'Example: {example}'
)
# Verify that 'powershell' exists instead (case-insensitive)
if example: # Only check if example is not empty
assert 'powershell' in example.lower(), (
f"In-context learning example should contain 'powershell' on Windows platform. "
f'Example: {example}'
)
def test_refine_prompt_function_works():
"""Test that the refine_prompt function correctly replaces 'bash' with 'powershell'."""
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
# Test basic replacement
test_prompt = 'Execute a bash command to list files'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert 'powershell' in refined_prompt.lower()
assert refined_prompt == 'Execute a powershell command to list files'
# Test multiple occurrences
test_prompt = 'Use bash to run bash commands in the bash shell'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert (
refined_prompt
== 'Use powershell to run powershell commands in the powershell shell'
)
# Test case sensitivity
test_prompt = 'BASH and Bash and bash should all be replaced'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert (
refined_prompt
== 'powershell and powershell and powershell should all be replaced'
)
# Test execute_bash tool name replacement
test_prompt = 'Use the execute_bash tool to run commands'
refined_prompt = refine_prompt(test_prompt)
assert 'execute_bash' not in refined_prompt.lower()
assert 'execute_powershell' in refined_prompt.lower()
assert refined_prompt == 'Use the execute_powershell tool to run commands'
# Test that words containing 'bash' but not equal to 'bash' are preserved
test_prompt = 'The bashful person likes bash-like syntax'
refined_prompt = refine_prompt(test_prompt)
# 'bashful' should be preserved, 'bash-like' should become 'powershell-like'
assert 'bashful' in refined_prompt
assert 'powershell-like' in refined_prompt
assert refined_prompt == 'The bashful person likes powershell-like syntax'
def test_refine_prompt_function_on_non_windows():
"""Test that the refine_prompt function doesn't change anything on non-Windows platforms."""
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
# Mock sys.platform to simulate non-Windows
with patch('openhands.agenthub.codeact_agent.tools.bash.sys.platform', 'linux'):
test_prompt = 'Execute a bash command to list files'
refined_prompt = refine_prompt(test_prompt)
# On non-Windows, the prompt should remain unchanged
assert refined_prompt == test_prompt
assert 'bash' in refined_prompt.lower()