From f4dcc136d010dddbb3c91e314e37416233f7bc0d Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 10 Nov 2025 21:34:55 +0100 Subject: [PATCH] tests: remove Windows-only tests and clean up Windows conditionals (#11697) --- tests/runtime/test_windows_bash.py | 594 ------------------ .../test_windows_prompt_refinement.py | 179 ------ 2 files changed, 773 deletions(-) delete mode 100644 tests/runtime/test_windows_bash.py delete mode 100644 tests/unit/agenthub/test_windows_prompt_refinement.py diff --git a/tests/runtime/test_windows_bash.py b/tests/runtime/test_windows_bash.py deleted file mode 100644 index 4570a34135..0000000000 --- a/tests/runtime/test_windows_bash.py +++ /dev/null @@ -1,594 +0,0 @@ -import os -import sys -import time -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from openhands.events.action import CmdRunAction -from openhands.events.observation import ErrorObservation -from openhands.events.observation.commands import ( - CmdOutputObservation, -) -from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE - - -def get_timeout_suffix(timeout_seconds): - """Helper function to generate the expected timeout suffix.""" - return ( - f'[The command timed out after {timeout_seconds} seconds. ' - f'{TIMEOUT_MESSAGE_TEMPLATE}]' - ) - - -# Skip all tests in this module if not running on Windows -pytestmark = pytest.mark.skipif( - sys.platform != 'win32', reason='WindowsPowershellSession tests require Windows' -) - - -@pytest.fixture -def windows_bash_session(temp_dir): - """Create a WindowsPowershellSession instance for testing.""" - # Instantiate the class. Initialization happens in __init__. - session = WindowsPowershellSession( - work_dir=temp_dir, - username=None, - ) - assert session._initialized # Should be true after __init__ - yield session - # Ensure cleanup happens even if test fails - session.close() - - -if sys.platform == 'win32': - from openhands.runtime.utils.windows_bash import WindowsPowershellSession - - -def test_command_execution(windows_bash_session): - """Test basic command execution.""" - # Test a simple command - action = CmdRunAction(command="Write-Output 'Hello World'") - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Check content, stripping potential trailing newlines - content = result.content.strip() - assert content == 'Hello World' - assert result.exit_code == 0 - - # Test a simple command with multiline input but single line output - action = CmdRunAction( - command="""Write-Output ` - ('hello ' + ` - 'world')""" - ) - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Check content, stripping potential trailing newlines - content = result.content.strip() - assert content == 'hello world' - assert result.exit_code == 0 - - # Test a simple command with a newline - action = CmdRunAction(command='Write-Output "Hello\\n World"') - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Check content, stripping potential trailing newlines - content = result.content.strip() - assert content == 'Hello\\n World' - assert result.exit_code == 0 - - -def test_command_with_error(windows_bash_session): - """Test command execution with an error reported via Write-Error.""" - # Test a command that will write an error - action = CmdRunAction(command="Write-Error 'Test Error'") - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Error stream is captured and appended - assert 'ERROR' in result.content - # Our implementation should set exit code to 1 when errors occur in stream - assert result.exit_code == 1 - - -def test_command_failure_exit_code(windows_bash_session): - """Test command execution that results in a non-zero exit code.""" - # Test a command that causes a script failure (e.g., invalid cmdlet) - action = CmdRunAction(command='Get-NonExistentCmdlet') - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Error should be captured in the output - assert 'ERROR' in result.content - assert ( - 'is not recognized' in result.content - or 'CommandNotFoundException' in result.content - ) - assert result.exit_code == 1 - - -def test_control_commands(windows_bash_session): - """Test handling of control commands (not supported).""" - # Test Ctrl+C - should return ErrorObservation if no command is running - action_c = CmdRunAction(command='C-c', is_input=True) - result_c = windows_bash_session.execute(action_c) - assert isinstance(result_c, ErrorObservation) - assert 'No previous running command to interact with' in result_c.content - - # Run a long-running command - action_long_running = CmdRunAction(command='Start-Sleep -Seconds 100') - result_long_running = windows_bash_session.execute(action_long_running) - assert isinstance(result_long_running, CmdOutputObservation) - assert result_long_running.exit_code == -1 - - # Test unsupported control command - action_d = CmdRunAction(command='C-d', is_input=True) - result_d = windows_bash_session.execute(action_d) - assert "Your input command 'C-d' was NOT processed" in result_d.metadata.suffix - assert ( - 'Direct input to running processes (is_input=True) is not supported by this PowerShell session implementation.' - in result_d.metadata.suffix - ) - assert 'You can use C-c to stop the process' in result_d.metadata.suffix - - # Ctrl+C now can cancel the long-running command - action_c = CmdRunAction(command='C-c', is_input=True) - result_c = windows_bash_session.execute(action_c) - assert isinstance(result_c, CmdOutputObservation) - assert result_c.exit_code == 0 - - -def test_command_timeout(windows_bash_session): - """Test command timeout handling.""" - # Test a command that will timeout - test_timeout_sec = 1 - action = CmdRunAction(command='Start-Sleep -Seconds 5') - action.set_hard_timeout(test_timeout_sec) - start_time = time.monotonic() - result = windows_bash_session.execute(action) - duration = time.monotonic() - start_time - - assert isinstance(result, CmdOutputObservation) - # Check for timeout specific metadata - assert 'timed out' in result.metadata.suffix.lower() # Check suffix, not content - assert result.exit_code == -1 # Timeout should result in exit code -1 - # Check that it actually timed out near the specified time - assert abs(duration - test_timeout_sec) < 0.5 # Allow some buffer - - -def test_long_running_command(windows_bash_session, dynamic_port): - action = CmdRunAction(command=f'python -u -m http.server {dynamic_port}') - action.set_hard_timeout(1) - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Verify the initial output was captured - assert 'Serving HTTP on' in result.content - # Check for timeout specific metadata - assert get_timeout_suffix(1.0) in result.metadata.suffix - assert result.exit_code == -1 - - # The action timed out, but the command should be still running - # We should now be able to interrupt it - action = CmdRunAction(command='C-c', is_input=True) - action.set_hard_timeout(30) # Give it enough time to stop - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # On Windows, Stop-Job termination doesn't inherently return output. - # The CmdOutputObservation will have content="" and exit_code=0 if successful. - # The KeyboardInterrupt message assertion is removed as it's added manually - # by the wrapper and might not be guaranteed depending on timing/implementation details. - assert result.exit_code == 0 - - # Verify the server is actually stopped by starting another one on the same port - action = CmdRunAction(command=f'python -u -m http.server {dynamic_port}') - action.set_hard_timeout(1) # Set a short timeout to check if it starts - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Verify the initial output was captured, indicating the port was free - assert 'Serving HTTP on' in result.content - # The command will time out again, so the exit code should be -1 - assert result.exit_code == -1 - - # Clean up the second server process - action = CmdRunAction(command='C-c', is_input=True) - action.set_hard_timeout(30) - result = windows_bash_session.execute(action) - assert result.exit_code == 0 - - -def test_multiple_commands_rejected_and_individual_execution(windows_bash_session): - """Test that executing multiple commands separated by newline is rejected, - but individual commands (including multiline) execute correctly. - """ - # Define a list of commands, including multiline and special characters - cmds = [ - 'Get-ChildItem', - 'Write-Output "hello`nworld"', - """Write-Output "hello it's me\"""", - """Write-Output ` - 'hello' ` - -NoNewline""", - """Write-Output 'hello`nworld`nare`nyou`nthere?'""", - """Write-Output 'hello`nworld`nare`nyou`n`nthere?'""", - """Write-Output 'hello`nworld `"'""", # Escape the trailing double quote - ] - joined_cmds = '\n'.join(cmds) - - # 1. Test that executing multiple commands at once fails - action_multi = CmdRunAction(command=joined_cmds) - result_multi = windows_bash_session.execute(action_multi) - - assert isinstance(result_multi, ErrorObservation) - assert 'ERROR: Cannot execute multiple commands at once' in result_multi.content - - # 2. Now run each command individually and verify they work - results = [] - for cmd in cmds: - action_single = CmdRunAction(command=cmd) - obs = windows_bash_session.execute(action_single) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - results.append(obs.content.strip()) # Strip trailing newlines for comparison - - -def test_working_directory(windows_bash_session, temp_dir): - """Test working directory handling.""" - initial_cwd = windows_bash_session._cwd - abs_temp_work_dir = os.path.abspath(temp_dir) - assert initial_cwd == abs_temp_work_dir - - # Create a subdirectory - sub_dir_path = Path(abs_temp_work_dir) / 'subdir' - sub_dir_path.mkdir() - assert sub_dir_path.is_dir() - - # Test changing directory - action_cd = CmdRunAction(command='Set-Location subdir') - result_cd = windows_bash_session.execute(action_cd) - assert isinstance(result_cd, CmdOutputObservation) - assert result_cd.exit_code == 0 - - # Check that the session's internal CWD state was updated - only check the last component of path - assert windows_bash_session._cwd.lower().endswith('\\subdir') - # Check that the metadata reflects the directory *after* the command - assert result_cd.metadata.working_dir.lower().endswith('\\subdir') - - # Execute a command in the new directory to confirm - action_pwd = CmdRunAction(command='(Get-Location).Path') - result_pwd = windows_bash_session.execute(action_pwd) - assert isinstance(result_pwd, CmdOutputObservation) - assert result_pwd.exit_code == 0 - # Check the command output reflects the new directory - assert result_pwd.content.strip().lower().endswith('\\subdir') - # Metadata should also reflect the current directory - assert result_pwd.metadata.working_dir.lower().endswith('\\subdir') - - # Test changing back to original directory - action_cd_back = CmdRunAction(command=f"Set-Location '{abs_temp_work_dir}'") - result_cd_back = windows_bash_session.execute(action_cd_back) - assert isinstance(result_cd_back, CmdOutputObservation) - assert result_cd_back.exit_code == 0 - # Check only the base name of the temp directory - temp_dir_basename = os.path.basename(abs_temp_work_dir) - assert windows_bash_session._cwd.lower().endswith(temp_dir_basename.lower()) - assert result_cd_back.metadata.working_dir.lower().endswith( - temp_dir_basename.lower() - ) - - -def test_cleanup(windows_bash_session): - """Test proper cleanup of resources (runspace).""" - # Session should be initialized before close - assert windows_bash_session._initialized - assert windows_bash_session.runspace is not None - - # Close the session - windows_bash_session.close() - - # Verify cleanup - assert not windows_bash_session._initialized - assert windows_bash_session.runspace is None - assert windows_bash_session._closed - - -def test_syntax_error_handling(windows_bash_session): - """Test handling of syntax errors in PowerShell commands.""" - # Test invalid command syntax - action = CmdRunAction(command="Write-Output 'Missing Quote") - result = windows_bash_session.execute(action) - assert isinstance(result, ErrorObservation) - # Error message appears in the output via PowerShell error stream - assert 'missing' in result.content.lower() or 'terminator' in result.content.lower() - - -def test_special_characters_handling(windows_bash_session): - """Test handling of commands containing special characters.""" - # Test command with special characters - special_chars_cmd = '''Write-Output "Special Chars: \\`& \\`| \\`< \\`> \\`\\` \\`' \\`\" \\`! \\`$ \\`% \\`^ \\`( \\`) \\`- \\`= \\`+ \\`[ \\`] \\`{ \\`} \\`; \\`: \\`, \\`. \\`? \\`/ \\`~"''' - action = CmdRunAction(command=special_chars_cmd) - result = windows_bash_session.execute(action) - assert isinstance(result, CmdOutputObservation) - # Check output contains the special characters - assert 'Special Chars:' in result.content - assert '&' in result.content and '|' in result.content - assert result.exit_code == 0 - - -def test_empty_command(windows_bash_session): - """Test handling of empty command string when no command is running.""" - action = CmdRunAction(command='') - result = windows_bash_session.execute(action) - assert isinstance(result, CmdOutputObservation) - # Should indicate error as per test_bash.py behavior - assert 'ERROR: No previous running command to retrieve logs from.' in result.content - # Exit code is typically 0 even for this specific "error" message in the bash implementation - assert result.exit_code == 0 - - -def test_exception_during_execution(windows_bash_session): - """Test handling of exceptions during command execution.""" - # Patch the PowerShell class itself within the module where it's used - patch_target = 'openhands.runtime.utils.windows_bash.PowerShell' - - # Create a mock PowerShell class - mock_powershell_class = MagicMock() - # Configure its Create method (which is called in execute) to raise an exception - # This simulates an error during the creation of the PowerShell object itself. - mock_powershell_class.Create.side_effect = Exception( - 'Test exception from mocked Create' - ) - - with patch(patch_target, mock_powershell_class): - action = CmdRunAction(command="Write-Output 'Test'") - # Now, when execute calls PowerShell.Create(), it will hit our mock and raise the exception - result = windows_bash_session.execute(action) - - # The exception should be caught by the try...except block in execute() - assert isinstance(result, ErrorObservation) - # Check the error message generated by the execute method's exception handler - assert 'Failed to start PowerShell job' in result.content - assert 'Test exception from mocked Create' in result.content - - -def test_streaming_output(windows_bash_session): - """Test handling of streaming output from commands.""" - # Command that produces output incrementally - command = """ - 1..3 | ForEach-Object { - Write-Output "Line $_" - Start-Sleep -Milliseconds 100 - } - """ - action = CmdRunAction(command=command) - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - assert 'Line 1' in result.content - assert 'Line 2' in result.content - assert 'Line 3' in result.content - assert result.exit_code == 0 - - -def test_shutdown_signal_handling(windows_bash_session): - """Test handling of shutdown signal during command execution.""" - # This would require mocking the shutdown_listener, which might be complex. - # For now, we'll just verify that a long-running command can be executed - # and that execute() returns properly. - command = 'Start-Sleep -Seconds 1' - action = CmdRunAction(command=command) - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - assert result.exit_code == 0 - - -def test_runspace_state_after_error(windows_bash_session): - """Test that the runspace remains usable after a command error.""" - # First, execute a command with an error - error_action = CmdRunAction(command='NonExistentCommand') - error_result = windows_bash_session.execute(error_action) - assert isinstance(error_result, CmdOutputObservation) - assert error_result.exit_code == 1 - - # Then, execute a valid command - valid_action = CmdRunAction(command="Write-Output 'Still working'") - valid_result = windows_bash_session.execute(valid_action) - assert isinstance(valid_result, CmdOutputObservation) - assert 'Still working' in valid_result.content - assert valid_result.exit_code == 0 - - -def test_stateful_file_operations(windows_bash_session, temp_dir): - """Test file operations to verify runspace state persistence. - - This test verifies that: - 1. The working directory state persists between commands - 2. File operations work correctly relative to the current directory - 3. The runspace maintains state for path-dependent operations - """ - abs_temp_work_dir = os.path.abspath(temp_dir) - - # 1. Create a subdirectory - sub_dir_name = 'file_test_dir' - sub_dir_path = Path(abs_temp_work_dir) / sub_dir_name - - # Use PowerShell to create directory - create_dir_action = CmdRunAction( - command=f'New-Item -Path "{sub_dir_name}" -ItemType Directory' - ) - result = windows_bash_session.execute(create_dir_action) - assert result.exit_code == 0 - - # Verify directory exists on disk - assert sub_dir_path.exists() and sub_dir_path.is_dir() - - # 2. Change to the new directory - cd_action = CmdRunAction(command=f"Set-Location '{sub_dir_name}'") - result = windows_bash_session.execute(cd_action) - assert result.exit_code == 0 - # Check only the last directory component - assert windows_bash_session._cwd.lower().endswith(f'\\{sub_dir_name.lower()}') - - # 3. Create a file in the current directory (which should be the subdirectory) - test_content = 'This is a test file created by PowerShell' - create_file_action = CmdRunAction( - command=f'Set-Content -Path "test_file.txt" -Value "{test_content}"' - ) - result = windows_bash_session.execute(create_file_action) - assert result.exit_code == 0 - - # 4. Verify file exists at the expected path (in the subdirectory) - expected_file_path = sub_dir_path / 'test_file.txt' - assert expected_file_path.exists() and expected_file_path.is_file() - - # 5. Read file contents using PowerShell and verify - read_file_action = CmdRunAction(command='Get-Content -Path "test_file.txt"') - result = windows_bash_session.execute(read_file_action) - assert result.exit_code == 0 - assert test_content in result.content - - # 6. Go back to parent and try to access file using relative path - cd_parent_action = CmdRunAction(command='Set-Location ..') - result = windows_bash_session.execute(cd_parent_action) - assert result.exit_code == 0 - # Check only the base name of the temp directory - temp_dir_basename = os.path.basename(abs_temp_work_dir) - assert windows_bash_session._cwd.lower().endswith(temp_dir_basename.lower()) - - # 7. Read the file using relative path - read_from_parent_action = CmdRunAction( - command=f'Get-Content -Path "{sub_dir_name}/test_file.txt"' - ) - result = windows_bash_session.execute(read_from_parent_action) - assert result.exit_code == 0 - assert test_content in result.content - - # 8. Clean up - remove_file_action = CmdRunAction( - command=f'Remove-Item -Path "{sub_dir_name}/test_file.txt" -Force' - ) - result = windows_bash_session.execute(remove_file_action) - assert result.exit_code == 0 - - -def test_command_output_continuation(windows_bash_session): - """Test retrieving continued output using empty command after timeout.""" - # Windows PowerShell version - action = CmdRunAction('1..5 | ForEach-Object { Write-Output $_; Start-Sleep 3 }') - action.set_hard_timeout(2.5) - obs = windows_bash_session.execute(action) - assert obs.content.strip() == '1' - assert obs.metadata.prefix == '' - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - - # Continue watching output - action = CmdRunAction('') - action.set_hard_timeout(2.5) - obs = windows_bash_session.execute(action) - assert '[Below is the output of the previous command.]' in obs.metadata.prefix - assert obs.content.strip() == '2' - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - - # Continue until completion - for expected in ['3', '4', '5']: - action = CmdRunAction('') - action.set_hard_timeout(2.5) - obs = windows_bash_session.execute(action) - assert '[Below is the output of the previous command.]' in obs.metadata.prefix - assert obs.content.strip() == expected - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - - # Final empty command to complete - action = CmdRunAction('') - obs = windows_bash_session.execute(action) - assert '[The command completed with exit code 0.]' in obs.metadata.suffix - - -def test_long_running_command_followed_by_execute(windows_bash_session): - """Tests behavior when a new command is sent while another is running after timeout.""" - # Start a slow command - action = CmdRunAction('1..3 | ForEach-Object { Write-Output $_; Start-Sleep 3 }') - action.set_hard_timeout(2.5) - obs = windows_bash_session.execute(action) - assert '1' in obs.content # First number should appear before timeout - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - assert obs.metadata.prefix == '' - - # Continue watching output - action = CmdRunAction('') - action.set_hard_timeout(2.5) - obs = windows_bash_session.execute(action) - assert '2' in obs.content - assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - - # Test command that produces no output - action = CmdRunAction('sleep 15') - action.set_hard_timeout(2.5) - obs = windows_bash_session.execute(action) - assert '3' not in obs.content - assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' - assert 'The previous command is still running' in obs.metadata.suffix - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - - # Finally continue again - action = CmdRunAction('') - obs = windows_bash_session.execute(action) - assert '3' in obs.content - assert '[The command completed with exit code 0.]' in obs.metadata.suffix - - -def test_command_non_existent_file(windows_bash_session): - """Test command execution for a non-existent file returns non-zero exit code.""" - # Use Get-Content which should fail if the file doesn't exist - action = CmdRunAction(command='Get-Content non_existent_file.txt') - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - # Check that the exit code is non-zero (should be 1 due to the '$?' check) - assert result.exit_code == 1 - # Check that the error message is captured in the output (error stream part) - assert 'Cannot find path' in result.content or 'does not exist' in result.content - - -def test_interactive_input(windows_bash_session): - """Test interactive input attempt reflects implementation limitations.""" - action = CmdRunAction('$name = Read-Host "Enter name"') - result = windows_bash_session.execute(action) - - assert isinstance(result, CmdOutputObservation) - assert ( - 'A command that prompts the user failed because the host program or the command type does not support user interaction. The host was attempting to request confirmation with the following message' - in result.content - ) - assert result.exit_code == 1 - - -def test_windows_path_handling(windows_bash_session, temp_dir): - """Test that os.chdir works with both forward slashes and escaped backslashes on Windows.""" - # Create a test directory - test_dir = Path(temp_dir) / 'test_dir' - test_dir.mkdir() - - # Test both path formats - path_formats = [ - str(test_dir).replace('\\', '/'), # Forward slashes - str(test_dir).replace('\\', '\\\\'), # Escaped backslashes - ] - - for path in path_formats: - # Test changing directory using os.chdir through PowerShell - action = CmdRunAction(command=f'python -c "import os; os.chdir(\'{path}\')"') - result = windows_bash_session.execute(action) - assert isinstance(result, CmdOutputObservation) - assert result.exit_code == 0, f'Failed with path format: {path}' diff --git a/tests/unit/agenthub/test_windows_prompt_refinement.py b/tests/unit/agenthub/test_windows_prompt_refinement.py deleted file mode 100644 index 38e8e8b3f5..0000000000 --- a/tests/unit/agenthub/test_windows_prompt_refinement.py +++ /dev/null @@ -1,179 +0,0 @@ -import sys -from unittest.mock import patch - -import pytest - -from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent -from openhands.core.config import AgentConfig -from openhands.llm.llm import LLM - -# Skip all tests in this module if not running on Windows -pytestmark = pytest.mark.skipif( - sys.platform != 'win32', reason='Windows prompt refinement tests require Windows' -) - - -@pytest.fixture -def mock_llm(): - """Create a mock LLM for testing.""" - llm = LLM(config={'model': 'gpt-4', 'api_key': 'test'}) - return llm - - -@pytest.fixture -def agent_config(): - """Create a basic agent config for testing.""" - return AgentConfig() - - -def test_codeact_agent_system_prompt_no_bash_on_windows(mock_llm, agent_config): - """Test that CodeActAgent's system prompt doesn't contain 'bash' on Windows.""" - # Create a CodeActAgent instance - agent = CodeActAgent(llm=mock_llm, config=agent_config) - - # Get the system prompt - system_prompt = agent.prompt_manager.get_system_message() - - # Assert that 'bash' doesn't exist in the system prompt (case-insensitive) - assert 'bash' not in system_prompt.lower(), ( - f"System prompt contains 'bash' on Windows platform. " - f"It should be replaced with 'powershell'. " - f'System prompt: {system_prompt}' - ) - - # Verify that 'powershell' exists instead (case-insensitive) - assert 'powershell' in system_prompt.lower(), ( - f"System prompt should contain 'powershell' on Windows platform. " - f'System prompt: {system_prompt}' - ) - - -def test_codeact_agent_tool_descriptions_no_bash_on_windows(mock_llm, agent_config): - """Test that CodeActAgent's tool descriptions don't contain 'bash' on Windows.""" - # Create a CodeActAgent instance - agent = CodeActAgent(llm=mock_llm, config=agent_config) - - # Get the tools - tools = agent.tools - - # Check each tool's description and parameters - for tool in tools: - if tool['type'] == 'function': - function_info = tool['function'] - - # Check function description - description = function_info.get('description', '') - assert 'bash' not in description.lower(), ( - f"Tool '{function_info['name']}' description contains 'bash' on Windows. " - f'Description: {description}' - ) - - # Check parameter descriptions - parameters = function_info.get('parameters', {}) - properties = parameters.get('properties', {}) - - for param_name, param_info in properties.items(): - param_description = param_info.get('description', '') - assert 'bash' not in param_description.lower(), ( - f"Tool '{function_info['name']}' parameter '{param_name}' " - f"description contains 'bash' on Windows. " - f'Parameter description: {param_description}' - ) - - -def test_in_context_learning_example_no_bash_on_windows(): - """Test that in-context learning examples don't contain 'bash' on Windows.""" - from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool - from openhands.agenthub.codeact_agent.tools.finish import FinishTool - from openhands.agenthub.codeact_agent.tools.str_replace_editor import ( - create_str_replace_editor_tool, - ) - from openhands.llm.fn_call_converter import get_example_for_tools - - # Create a sample set of tools - tools = [ - create_cmd_run_tool(), - create_str_replace_editor_tool(), - FinishTool, - ] - - # Get the in-context learning example - example = get_example_for_tools(tools) - - # Assert that 'bash' doesn't exist in the example (case-insensitive) - assert 'bash' not in example.lower(), ( - f"In-context learning example contains 'bash' on Windows platform. " - f"It should be replaced with 'powershell'. " - f'Example: {example}' - ) - - # Verify that 'powershell' exists instead (case-insensitive) - if example: # Only check if example is not empty - assert 'powershell' in example.lower(), ( - f"In-context learning example should contain 'powershell' on Windows platform. " - f'Example: {example}' - ) - - -def test_refine_prompt_function_works(): - """Test that the refine_prompt function correctly replaces 'bash' with 'powershell'.""" - from openhands.agenthub.codeact_agent.tools.bash import refine_prompt - - # Test basic replacement - test_prompt = 'Execute a bash command to list files' - refined_prompt = refine_prompt(test_prompt) - - assert 'bash' not in refined_prompt.lower() - assert 'powershell' in refined_prompt.lower() - assert refined_prompt == 'Execute a powershell command to list files' - - # Test multiple occurrences - test_prompt = 'Use bash to run bash commands in the bash shell' - refined_prompt = refine_prompt(test_prompt) - - assert 'bash' not in refined_prompt.lower() - assert ( - refined_prompt - == 'Use powershell to run powershell commands in the powershell shell' - ) - - # Test case sensitivity - test_prompt = 'BASH and Bash and bash should all be replaced' - refined_prompt = refine_prompt(test_prompt) - - assert 'bash' not in refined_prompt.lower() - assert ( - refined_prompt - == 'powershell and powershell and powershell should all be replaced' - ) - - # Test execute_bash tool name replacement - test_prompt = 'Use the execute_bash tool to run commands' - refined_prompt = refine_prompt(test_prompt) - - assert 'execute_bash' not in refined_prompt.lower() - assert 'execute_powershell' in refined_prompt.lower() - assert refined_prompt == 'Use the execute_powershell tool to run commands' - - # Test that words containing 'bash' but not equal to 'bash' are preserved - test_prompt = 'The bashful person likes bash-like syntax' - refined_prompt = refine_prompt(test_prompt) - - # 'bashful' should be preserved, 'bash-like' should become 'powershell-like' - assert 'bashful' in refined_prompt - assert 'powershell-like' in refined_prompt - assert refined_prompt == 'The bashful person likes powershell-like syntax' - - -def test_refine_prompt_function_on_non_windows(): - """Test that the refine_prompt function doesn't change anything on non-Windows platforms.""" - from openhands.agenthub.codeact_agent.tools.bash import refine_prompt - - # Mock sys.platform to simulate non-Windows - with patch('openhands.agenthub.codeact_agent.tools.bash.sys.platform', 'linux'): - test_prompt = 'Execute a bash command to list files' - refined_prompt = refine_prompt(test_prompt) - - # On non-Windows, the prompt should remain unchanged - assert refined_prompt == test_prompt - assert 'bash' in refined_prompt.lower()