mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Fix Windows prompt refinement: ensure 'bash' is replaced with 'powershell' in all prompts (#10179)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
3eecac2003
commit
803bdced9c
@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||||
@ -37,7 +38,16 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
|||||||
|
|
||||||
def refine_prompt(prompt: str):
|
def refine_prompt(prompt: str):
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
return prompt.replace('bash', 'powershell')
|
# Replace 'bash' with 'powershell' including tool names like 'execute_bash'
|
||||||
|
# First replace 'execute_bash' with 'execute_powershell' to handle tool names
|
||||||
|
result = re.sub(
|
||||||
|
r'\bexecute_bash\b', 'execute_powershell', prompt, flags=re.IGNORECASE
|
||||||
|
)
|
||||||
|
# Then replace standalone 'bash' with 'powershell'
|
||||||
|
result = re.sub(
|
||||||
|
r'(?<!execute_)(?<!_)\bbash\b', 'powershell', result, flags=re.IGNORECASE
|
||||||
|
)
|
||||||
|
return result
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -383,7 +383,7 @@ Do NOT assume the environment is the same as in the example above.
|
|||||||
"""
|
"""
|
||||||
example = example.lstrip()
|
example = example.lstrip()
|
||||||
|
|
||||||
return example
|
return refine_prompt(example)
|
||||||
|
|
||||||
|
|
||||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools
|
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from itertools import islice
|
|||||||
|
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
|
|
||||||
|
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
|
||||||
from openhands.controller.state.state import State
|
from openhands.controller.state.state import State
|
||||||
from openhands.core.message import Message, TextContent
|
from openhands.core.message import Message, TextContent
|
||||||
from openhands.events.observation.agent import MicroagentKnowledge
|
from openhands.events.observation.agent import MicroagentKnowledge
|
||||||
@ -91,7 +92,8 @@ class PromptManager:
|
|||||||
return Template(file.read())
|
return Template(file.read())
|
||||||
|
|
||||||
def get_system_message(self) -> str:
|
def get_system_message(self) -> str:
|
||||||
return self.system_template.render().strip()
|
system_message = self.system_template.render().strip()
|
||||||
|
return refine_prompt(system_message)
|
||||||
|
|
||||||
def get_example_user_message(self) -> str:
|
def get_example_user_message(self) -> str:
|
||||||
"""This is an initial user message that can be provided to the agent
|
"""This is an initial user message that can be provided to the agent
|
||||||
|
|||||||
179
tests/unit/test_windows_prompt_refinement.py
Normal file
179
tests/unit/test_windows_prompt_refinement.py
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
import sys
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
|
||||||
|
from openhands.core.config import AgentConfig
|
||||||
|
from openhands.llm.llm import LLM
|
||||||
|
|
||||||
|
# Skip all tests in this module if not running on Windows
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
sys.platform != 'win32', reason='Windows prompt refinement tests require Windows'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_llm():
|
||||||
|
"""Create a mock LLM for testing."""
|
||||||
|
llm = LLM(config={'model': 'gpt-4', 'api_key': 'test'})
|
||||||
|
return llm
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def agent_config():
|
||||||
|
"""Create a basic agent config for testing."""
|
||||||
|
return AgentConfig()
|
||||||
|
|
||||||
|
|
||||||
|
def test_codeact_agent_system_prompt_no_bash_on_windows(mock_llm, agent_config):
|
||||||
|
"""Test that CodeActAgent's system prompt doesn't contain 'bash' on Windows."""
|
||||||
|
# Create a CodeActAgent instance
|
||||||
|
agent = CodeActAgent(llm=mock_llm, config=agent_config)
|
||||||
|
|
||||||
|
# Get the system prompt
|
||||||
|
system_prompt = agent.prompt_manager.get_system_message()
|
||||||
|
|
||||||
|
# Assert that 'bash' doesn't exist in the system prompt (case-insensitive)
|
||||||
|
assert 'bash' not in system_prompt.lower(), (
|
||||||
|
f"System prompt contains 'bash' on Windows platform. "
|
||||||
|
f"It should be replaced with 'powershell'. "
|
||||||
|
f'System prompt: {system_prompt}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify that 'powershell' exists instead (case-insensitive)
|
||||||
|
assert 'powershell' in system_prompt.lower(), (
|
||||||
|
f"System prompt should contain 'powershell' on Windows platform. "
|
||||||
|
f'System prompt: {system_prompt}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_codeact_agent_tool_descriptions_no_bash_on_windows(mock_llm, agent_config):
|
||||||
|
"""Test that CodeActAgent's tool descriptions don't contain 'bash' on Windows."""
|
||||||
|
# Create a CodeActAgent instance
|
||||||
|
agent = CodeActAgent(llm=mock_llm, config=agent_config)
|
||||||
|
|
||||||
|
# Get the tools
|
||||||
|
tools = agent.tools
|
||||||
|
|
||||||
|
# Check each tool's description and parameters
|
||||||
|
for tool in tools:
|
||||||
|
if tool['type'] == 'function':
|
||||||
|
function_info = tool['function']
|
||||||
|
|
||||||
|
# Check function description
|
||||||
|
description = function_info.get('description', '')
|
||||||
|
assert 'bash' not in description.lower(), (
|
||||||
|
f"Tool '{function_info['name']}' description contains 'bash' on Windows. "
|
||||||
|
f'Description: {description}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check parameter descriptions
|
||||||
|
parameters = function_info.get('parameters', {})
|
||||||
|
properties = parameters.get('properties', {})
|
||||||
|
|
||||||
|
for param_name, param_info in properties.items():
|
||||||
|
param_description = param_info.get('description', '')
|
||||||
|
assert 'bash' not in param_description.lower(), (
|
||||||
|
f"Tool '{function_info['name']}' parameter '{param_name}' "
|
||||||
|
f"description contains 'bash' on Windows. "
|
||||||
|
f'Parameter description: {param_description}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_in_context_learning_example_no_bash_on_windows():
|
||||||
|
"""Test that in-context learning examples don't contain 'bash' on Windows."""
|
||||||
|
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
|
||||||
|
from openhands.agenthub.codeact_agent.tools.finish import FinishTool
|
||||||
|
from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
|
||||||
|
create_str_replace_editor_tool,
|
||||||
|
)
|
||||||
|
from openhands.llm.fn_call_converter import get_example_for_tools
|
||||||
|
|
||||||
|
# Create a sample set of tools
|
||||||
|
tools = [
|
||||||
|
create_cmd_run_tool(),
|
||||||
|
create_str_replace_editor_tool(),
|
||||||
|
FinishTool,
|
||||||
|
]
|
||||||
|
|
||||||
|
# Get the in-context learning example
|
||||||
|
example = get_example_for_tools(tools)
|
||||||
|
|
||||||
|
# Assert that 'bash' doesn't exist in the example (case-insensitive)
|
||||||
|
assert 'bash' not in example.lower(), (
|
||||||
|
f"In-context learning example contains 'bash' on Windows platform. "
|
||||||
|
f"It should be replaced with 'powershell'. "
|
||||||
|
f'Example: {example}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify that 'powershell' exists instead (case-insensitive)
|
||||||
|
if example: # Only check if example is not empty
|
||||||
|
assert 'powershell' in example.lower(), (
|
||||||
|
f"In-context learning example should contain 'powershell' on Windows platform. "
|
||||||
|
f'Example: {example}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_refine_prompt_function_works():
|
||||||
|
"""Test that the refine_prompt function correctly replaces 'bash' with 'powershell'."""
|
||||||
|
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
|
||||||
|
|
||||||
|
# Test basic replacement
|
||||||
|
test_prompt = 'Execute a bash command to list files'
|
||||||
|
refined_prompt = refine_prompt(test_prompt)
|
||||||
|
|
||||||
|
assert 'bash' not in refined_prompt.lower()
|
||||||
|
assert 'powershell' in refined_prompt.lower()
|
||||||
|
assert refined_prompt == 'Execute a powershell command to list files'
|
||||||
|
|
||||||
|
# Test multiple occurrences
|
||||||
|
test_prompt = 'Use bash to run bash commands in the bash shell'
|
||||||
|
refined_prompt = refine_prompt(test_prompt)
|
||||||
|
|
||||||
|
assert 'bash' not in refined_prompt.lower()
|
||||||
|
assert (
|
||||||
|
refined_prompt
|
||||||
|
== 'Use powershell to run powershell commands in the powershell shell'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test case sensitivity
|
||||||
|
test_prompt = 'BASH and Bash and bash should all be replaced'
|
||||||
|
refined_prompt = refine_prompt(test_prompt)
|
||||||
|
|
||||||
|
assert 'bash' not in refined_prompt.lower()
|
||||||
|
assert (
|
||||||
|
refined_prompt
|
||||||
|
== 'powershell and powershell and powershell should all be replaced'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test execute_bash tool name replacement
|
||||||
|
test_prompt = 'Use the execute_bash tool to run commands'
|
||||||
|
refined_prompt = refine_prompt(test_prompt)
|
||||||
|
|
||||||
|
assert 'execute_bash' not in refined_prompt.lower()
|
||||||
|
assert 'execute_powershell' in refined_prompt.lower()
|
||||||
|
assert refined_prompt == 'Use the execute_powershell tool to run commands'
|
||||||
|
|
||||||
|
# Test that words containing 'bash' but not equal to 'bash' are preserved
|
||||||
|
test_prompt = 'The bashful person likes bash-like syntax'
|
||||||
|
refined_prompt = refine_prompt(test_prompt)
|
||||||
|
|
||||||
|
# 'bashful' should be preserved, 'bash-like' should become 'powershell-like'
|
||||||
|
assert 'bashful' in refined_prompt
|
||||||
|
assert 'powershell-like' in refined_prompt
|
||||||
|
assert refined_prompt == 'The bashful person likes powershell-like syntax'
|
||||||
|
|
||||||
|
|
||||||
|
def test_refine_prompt_function_on_non_windows():
|
||||||
|
"""Test that the refine_prompt function doesn't change anything on non-Windows platforms."""
|
||||||
|
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
|
||||||
|
|
||||||
|
# Mock sys.platform to simulate non-Windows
|
||||||
|
with patch('openhands.agenthub.codeact_agent.tools.bash.sys.platform', 'linux'):
|
||||||
|
test_prompt = 'Execute a bash command to list files'
|
||||||
|
refined_prompt = refine_prompt(test_prompt)
|
||||||
|
|
||||||
|
# On non-Windows, the prompt should remain unchanged
|
||||||
|
assert refined_prompt == test_prompt
|
||||||
|
assert 'bash' in refined_prompt.lower()
|
||||||
Loading…
x
Reference in New Issue
Block a user