diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index c7a332166b..62ac926921 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -8,6 +8,7 @@ import shlex import shutil import string import tempfile +import time from abc import abstractmethod from pathlib import Path from types import MappingProxyType @@ -78,6 +79,11 @@ from openhands.utils.async_utils import ( DISABLE_VSCODE_PLUGIN = os.getenv('DISABLE_VSCODE_PLUGIN', 'false').lower() == 'true' +# Command retry config for bash session busy race condition (issue #12265) +CMD_RETRY_MAX_ATTEMPTS = 3 +CMD_RETRY_BASE_DELAY_SECONDS = 1.0 +CMD_RETRY_TIMEOUT_EXIT_CODE = -1 + def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]: ret = {} @@ -242,11 +248,68 @@ class Runtime(FileEditRuntimeMixin): self, runtime_status: RuntimeStatus, msg: str = '', level: str = 'info' ): """Sends a status message if the callback function was provided.""" + self.runtime_status = runtime_status if self.status_callback: self.status_callback(level, runtime_status, msg) - # ==================================================================== + def _is_bash_session_timeout(self, obs: Observation) -> bool: + return ( + isinstance(obs, CmdOutputObservation) + and obs.exit_code == CMD_RETRY_TIMEOUT_EXIT_CODE + ) + + def _calculate_retry_delay(self, attempt: int) -> float: + return CMD_RETRY_BASE_DELAY_SECONDS * (2**attempt) + + def _run_cmd_with_retry( + self, + cmd: str, + error_context: str, + max_retries: int = CMD_RETRY_MAX_ATTEMPTS, + ) -> CmdOutputObservation: + """Run command with exponential backoff retry on bash session timeout.""" + + if not cmd or not cmd.strip(): + raise ValueError('Command cannot be empty') + if max_retries < 1: + raise ValueError('max_retries must be at least 1') + + last_obs: Observation | None = None + + for attempt in range(max_retries): + obs = self.run(CmdRunAction(cmd)) + + if isinstance(obs, CmdOutputObservation) and obs.exit_code == 0: + if attempt > 0: + logger.info(f'Command succeeded after {attempt + 1} attempts') + return obs + + last_obs = obs + is_retryable = self._is_bash_session_timeout(obs) + + if is_retryable and attempt < max_retries - 1: + delay = self._calculate_retry_delay(attempt) + logger.warning( + f'Bash session busy, retrying in {delay:.1f}s ' + f'(attempt {attempt + 1}/{max_retries})' + ) + time.sleep(delay) + continue + + break + + error_content = self._extract_error_content(last_obs) + raise RuntimeError(f'{error_context}: {error_content}') + + def _extract_error_content(self, obs: Observation | None) -> str: + if obs is None: + return 'No observation received' + if isinstance(obs, CmdOutputObservation): + return f'{obs.content} (exit_code={obs.exit_code})' + if isinstance(obs, ErrorObservation): + return obs.content + return str(obs) def add_env_vars(self, env_vars: dict[str, str]) -> None: env_vars = {key.upper(): value for key, value in env_vars.items()} @@ -282,11 +345,9 @@ class Runtime(FileEditRuntimeMixin): cmd = cmd.strip() logger.debug('Adding env vars to PowerShell') # don't log the values - obs = self.run(CmdRunAction(cmd)) - if not isinstance(obs, CmdOutputObservation) or obs.exit_code != 0: - raise RuntimeError( - f'Failed to add env vars [{env_vars.keys()}] to environment: {obs.content}' - ) + self._run_cmd_with_retry( + cmd, f'Failed to add env vars [{env_vars.keys()}] to environment' + ) # We don't add to profile persistence on Windows as it's more complex # and varies between PowerShell versions @@ -308,20 +369,16 @@ class Runtime(FileEditRuntimeMixin): cmd = cmd.strip() logger.debug('Adding env vars to bash') # don't log the values - obs = self.run(CmdRunAction(cmd)) - if not isinstance(obs, CmdOutputObservation) or obs.exit_code != 0: - raise RuntimeError( - f'Failed to add env vars [{env_vars.keys()}] to environment: {obs.content}' - ) + self._run_cmd_with_retry( + cmd, f'Failed to add env vars [{env_vars.keys()}] to environment' + ) # Add to .bashrc for persistence bashrc_cmd = bashrc_cmd.strip() logger.debug(f'Adding env var to .bashrc: {env_vars.keys()}') - obs = self.run(CmdRunAction(bashrc_cmd)) - if not isinstance(obs, CmdOutputObservation) or obs.exit_code != 0: - raise RuntimeError( - f'Failed to add env vars [{env_vars.keys()}] to .bashrc: {obs.content}' - ) + self._run_cmd_with_retry( + bashrc_cmd, f'Failed to add env vars [{env_vars.keys()}] to .bashrc' + ) def on_event(self, event: Event) -> None: if isinstance(event, Action): diff --git a/tests/unit/runtime/test_cmd_retry.py b/tests/unit/runtime/test_cmd_retry.py new file mode 100644 index 0000000000..be39754886 --- /dev/null +++ b/tests/unit/runtime/test_cmd_retry.py @@ -0,0 +1,218 @@ +"""Tests for command retry logic with exponential backoff. + +Tests the fix for GitHub issue #12265 where GitHub token export fails +when bash session is busy (race condition). +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from openhands.events.observation import CmdOutputObservation, ErrorObservation +from openhands.runtime.base import ( + CMD_RETRY_BASE_DELAY_SECONDS, + CMD_RETRY_MAX_ATTEMPTS, + CMD_RETRY_TIMEOUT_EXIT_CODE, +) + + +class TestCmdRetryHelpers: + """Tests for the helper methods used in command retry logic.""" + + @pytest.fixture + def mock_runtime(self): + """Create a mock runtime with the retry methods.""" + from openhands.runtime.base import Runtime + + # Create a minimal mock that has the methods we need to test + runtime = MagicMock(spec=Runtime) + + # Bind the actual methods to our mock + runtime._is_bash_session_timeout = Runtime._is_bash_session_timeout.__get__( + runtime, Runtime + ) + runtime._calculate_retry_delay = Runtime._calculate_retry_delay.__get__( + runtime, Runtime + ) + runtime._extract_error_content = Runtime._extract_error_content.__get__( + runtime, Runtime + ) + runtime._run_cmd_with_retry = Runtime._run_cmd_with_retry.__get__( + runtime, Runtime + ) + + return runtime + + def test_is_bash_session_timeout_with_timeout_exit_code(self, mock_runtime): + """Test that timeout exit code (-1) is correctly identified.""" + obs = CmdOutputObservation(content='', command='test', exit_code=-1) + assert mock_runtime._is_bash_session_timeout(obs) is True + + def test_is_bash_session_timeout_with_success(self, mock_runtime): + """Test that successful commands are not identified as timeout.""" + obs = CmdOutputObservation(content='output', command='test', exit_code=0) + assert mock_runtime._is_bash_session_timeout(obs) is False + + def test_is_bash_session_timeout_with_other_error(self, mock_runtime): + """Test that other error codes are not identified as timeout.""" + obs = CmdOutputObservation(content='error', command='test', exit_code=1) + assert mock_runtime._is_bash_session_timeout(obs) is False + + def test_is_bash_session_timeout_with_error_observation(self, mock_runtime): + """Test that ErrorObservation is not identified as timeout.""" + obs = ErrorObservation(content='some error') + assert mock_runtime._is_bash_session_timeout(obs) is False + + def test_calculate_retry_delay_exponential(self, mock_runtime): + """Test exponential backoff delay calculation.""" + assert mock_runtime._calculate_retry_delay(0) == CMD_RETRY_BASE_DELAY_SECONDS + assert ( + mock_runtime._calculate_retry_delay(1) == CMD_RETRY_BASE_DELAY_SECONDS * 2 + ) + assert ( + mock_runtime._calculate_retry_delay(2) == CMD_RETRY_BASE_DELAY_SECONDS * 4 + ) + + def test_extract_error_content_from_none(self, mock_runtime): + """Test error extraction from None observation.""" + assert mock_runtime._extract_error_content(None) == 'No observation received' + + def test_extract_error_content_from_cmd_output(self, mock_runtime): + """Test error extraction from CmdOutputObservation.""" + obs = CmdOutputObservation( + content='command failed', command='test', exit_code=1 + ) + result = mock_runtime._extract_error_content(obs) + assert 'command failed' in result + assert 'exit_code=1' in result + + def test_extract_error_content_from_error_observation(self, mock_runtime): + """Test error extraction from ErrorObservation.""" + obs = ErrorObservation(content='something went wrong') + assert mock_runtime._extract_error_content(obs) == 'something went wrong' + + +class TestRunCmdWithRetry: + """Tests for the main _run_cmd_with_retry method.""" + + @pytest.fixture + def mock_runtime(self): + """Create a mock runtime for retry testing.""" + from openhands.runtime.base import Runtime + + runtime = MagicMock(spec=Runtime) + + # Bind actual methods + runtime._is_bash_session_timeout = Runtime._is_bash_session_timeout.__get__( + runtime, Runtime + ) + runtime._calculate_retry_delay = Runtime._calculate_retry_delay.__get__( + runtime, Runtime + ) + runtime._extract_error_content = Runtime._extract_error_content.__get__( + runtime, Runtime + ) + runtime._run_cmd_with_retry = Runtime._run_cmd_with_retry.__get__( + runtime, Runtime + ) + + return runtime + + def test_success_on_first_attempt(self, mock_runtime): + """Test successful command execution on first try.""" + success_obs = CmdOutputObservation( + content='success', command='echo test', exit_code=0 + ) + mock_runtime.run = MagicMock(return_value=success_obs) + + result = mock_runtime._run_cmd_with_retry('echo test', 'Test error') + + assert result == success_obs + assert mock_runtime.run.call_count == 1 + + @patch('openhands.runtime.base.time.sleep') + def test_retry_on_timeout_then_success(self, mock_sleep, mock_runtime): + """Test retry behavior when first attempt times out.""" + timeout_obs = CmdOutputObservation( + content='', command='export VAR=value', exit_code=-1 + ) + success_obs = CmdOutputObservation( + content='', command='export VAR=value', exit_code=0 + ) + + mock_runtime.run = MagicMock(side_effect=[timeout_obs, success_obs]) + + result = mock_runtime._run_cmd_with_retry( + 'export VAR=value', 'Failed to export' + ) + + assert result == success_obs + assert mock_runtime.run.call_count == 2 + mock_sleep.assert_called_once() + + @patch('openhands.runtime.base.time.sleep') + def test_retry_exhaustion_raises_error(self, mock_sleep, mock_runtime): + """Test that RuntimeError is raised after all retries fail.""" + timeout_obs = CmdOutputObservation( + content='timeout', command='cmd', exit_code=-1 + ) + mock_runtime.run = MagicMock(return_value=timeout_obs) + + with pytest.raises(RuntimeError) as exc_info: + mock_runtime._run_cmd_with_retry('cmd', 'Command failed', max_retries=3) + + assert 'Command failed' in str(exc_info.value) + assert mock_runtime.run.call_count == 3 + assert mock_sleep.call_count == 2 # Called between retries, not after last + + def test_non_timeout_error_fails_immediately(self, mock_runtime): + """Test that non-timeout errors don't trigger retry.""" + error_obs = CmdOutputObservation( + content='permission denied', command='cmd', exit_code=1 + ) + mock_runtime.run = MagicMock(return_value=error_obs) + + with pytest.raises(RuntimeError) as exc_info: + mock_runtime._run_cmd_with_retry('cmd', 'Command failed') + + assert 'Command failed' in str(exc_info.value) + assert mock_runtime.run.call_count == 1 # No retries for non-timeout + + def test_empty_command_raises_value_error(self, mock_runtime): + """Test that empty command raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + mock_runtime._run_cmd_with_retry('', 'Error') + assert 'empty' in str(exc_info.value).lower() + + def test_invalid_max_retries_raises_value_error(self, mock_runtime): + """Test that max_retries < 1 raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + mock_runtime._run_cmd_with_retry('cmd', 'Error', max_retries=0) + assert 'max_retries' in str(exc_info.value).lower() + + @patch('openhands.runtime.base.time.sleep') + def test_exponential_backoff_delays(self, mock_sleep, mock_runtime): + """Test that delays follow exponential backoff pattern.""" + timeout_obs = CmdOutputObservation(content='', command='cmd', exit_code=-1) + mock_runtime.run = MagicMock(return_value=timeout_obs) + + with pytest.raises(RuntimeError): + mock_runtime._run_cmd_with_retry('cmd', 'Error', max_retries=3) + + # Verify exponential delays: 1s, 2s (not called after 3rd attempt) + calls = mock_sleep.call_args_list + assert len(calls) == 2 + assert calls[0][0][0] == CMD_RETRY_BASE_DELAY_SECONDS # 1s + assert calls[1][0][0] == CMD_RETRY_BASE_DELAY_SECONDS * 2 # 2s + + +class TestConstants: + """Tests for retry configuration constants.""" + + def test_constants_have_sensible_values(self): + """Test that constants are configured reasonably.""" + assert CMD_RETRY_MAX_ATTEMPTS >= 1 + assert CMD_RETRY_BASE_DELAY_SECONDS > 0 + assert ( + CMD_RETRY_TIMEOUT_EXIT_CODE == -1 + ) # Must match NO_CHANGE_TIMEOUT behavior