fix(bash): also show timeout reminder when no_change_timeout is triggered (#6318)

Co-authored-by: Robert Brennan <accounts@rbren.io>
2025-12-26 05:48:36 +08:00 · 2025-01-17 14:31:23 -05:00 · 2025-01-17 14:31:23 -05:00 · 899c1f8360
commit 899c1f8360
parent c5d7caf01f
13 changed files with 324 additions and 116 deletions
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@ -359,7 +359,7 @@ def complete_runtime(
        action = CmdRunAction(
            command=f'git diff --no-color --cached {instance["base_commit"]}'
        )
-        action.timeout = max(300 + 100 * n_retries, 600)
+        action.set_hard_timeout(max(300 + 100 * n_retries, 600))
        logger.info(action, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action)
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
--- a/evaluation/integration_tests/tests/t07_interactive_commands.py
+++ b/evaluation/integration_tests/tests/t07_interactive_commands.py
@ -0,0 +1,73 @@
+import hashlib
+
+from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
+from openhands.events.action import (
+    AgentFinishAction,
+    FileWriteAction,
+    MessageAction,
+)
+from openhands.events.event import Event
+from openhands.events.observation import AgentDelegateObservation
+from openhands.runtime.base import Runtime
+
+
+class Test(BaseIntegrationTest):
+    INSTRUCTION = 'Execute the python script /workspace/python_script.py with input "John" and "25" and tell me the secret number.'
+    SECRET_NUMBER = int(hashlib.sha256(str(25).encode()).hexdigest()[:8], 16) % 1000
+
+    @classmethod
+    def initialize_runtime(cls, runtime: Runtime) -> None:
+        from openhands.core.logger import openhands_logger as logger
+
+        action = FileWriteAction(
+            path='/workspace/python_script.py',
+            content=(
+                'name = input("Enter your name: "); age = input("Enter your age: "); '
+                'import hashlib; secret = int(hashlib.sha256(str(age).encode()).hexdigest()[:8], 16) % 1000; '
+                'print(f"Hello {name}, you are {age} years old. Tell you a secret number: {secret}")'
+            ),
+        )
+        logger.info(action, extra={'msg_type': 'ACTION'})
+        observation = runtime.run_action(action)
+        logger.info(observation, extra={'msg_type': 'OBSERVATION'})
+
+    @classmethod
+    def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
+        from openhands.core.logger import openhands_logger as logger
+
+        # check if the license information is in any message
+        message_actions = [
+            event
+            for event in histories
+            if isinstance(
+                event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
+            )
+        ]
+        logger.info(f'Total message-like events: {len(message_actions)}')
+
+        for event in message_actions:
+            try:
+                if isinstance(event, AgentDelegateObservation):
+                    content = event.content
+                elif isinstance(event, AgentFinishAction):
+                    content = event.outputs.get('content', '')
+                    if event.thought:
+                        content += f'\n\n{event.thought}'
+                elif isinstance(event, MessageAction):
+                    content = event.content
+                else:
+                    logger.warning(f'Unexpected event type: {type(event)}')
+                    continue
+
+                if str(cls.SECRET_NUMBER) in content:
+                    return TestResult(success=True)
+            except Exception as e:
+                logger.error(f'Error processing event: {e}')
+
+        logger.debug(
+            f'Total messages: {len(message_actions)}. Messages: {message_actions}'
+        )
+        return TestResult(
+            success=False,
+            reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
+        )
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@ -371,7 +371,6 @@ def _process_instance_wrapper(
            error = str(e)
            stacktrace = traceback.format_exc()
            if attempt == max_retries:
-                logger.exception(e)
                msg = (
                    '-' * 10
                    + '\n'
@ -395,19 +394,13 @@ def _process_instance_wrapper(
                + '-' * 10
                + '\n'
            )
-            if isinstance(
-                e,
-                (
-                    AgentRuntimeDisconnectedError,
-                    AgentRuntimeUnavailableError,
-                    AgentRuntimeNotFoundError,
-                ),
-            ):
+            # e is likely an EvalException, so we can't directly infer it from type
+            # but rather check if it's a fatal error
+            if is_fatal_runtime_error(str(e)):
                runtime_failure_count += 1
                msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
+                msg += '\n' + '-' * 10 + '\n'
            logger.error(msg)
-            if use_mp:
-                print(msg)  # use print to directly print to console
            time.sleep(5)


@ -564,6 +557,7 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
        AgentRuntimeNotReadyError,
        AgentRuntimeDisconnectedError,
        AgentRuntimeNotFoundError,
+        ConnectionError,
    ]

    if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
@ -573,6 +567,23 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
    return False


+def is_fatal_runtime_error(error: str | None) -> bool:
+    if not error:
+        return False
+
+    FATAL_RUNTIME_ERRORS = [
+        AgentRuntimeUnavailableError,
+        AgentRuntimeDisconnectedError,
+        AgentRuntimeNotFoundError,
+    ]
+
+    if any(exception.__name__ in error for exception in FATAL_RUNTIME_ERRORS):
+        logger.error(f'Fatal runtime error detected: {error}')
+        return True
+
+    return False
+
+
 def get_metrics(state: State) -> dict[str, Any]:
    """Extract metrics from the state."""
    metrics = state.metrics.get() if state.metrics else {}
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@ -31,7 +31,7 @@ from openhands.events.tool import ToolCallMetadata

 _BASH_DESCRIPTION = """Execute a bash command in the terminal.
 * Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
-* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
+* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
 """

 CmdRunTool = ChatCompletionToolParam(
@ -46,6 +46,11 @@ CmdRunTool = ChatCompletionToolParam(
                    'type': 'string',
                    'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
                },
+                'is_input': {
+                    'type': 'string',
+                    'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
+                    'enum': ['true', 'false'],
+                },
            },
            'required': ['command'],
        },
@ -488,6 +493,12 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                    f'Failed to parse tool call arguments: {tool_call.function.arguments}'
                ) from e
            if tool_call.function.name == 'execute_bash':
+                # this is an LLM error: add empty command to avoid breaking the tool call
+                if 'command' not in arguments:
+                    arguments['command'] = ''
+                # convert is_input to boolean
+                if 'is_input' in arguments:
+                    arguments['is_input'] = arguments['is_input'] == 'true'
                action = CmdRunAction(**arguments)
            elif tool_call.function.name == 'execute_ipython_cell':
                action = IPythonRunCellAction(**arguments)
--- a/openhands/events/action/commands.py
+++ b/openhands/events/action/commands.py
@ -11,8 +11,10 @@ from openhands.events.action.action import (

@dataclass
 class CmdRunAction(Action):
-    command: str
-    # When `command` is empty, it will be used to print the current tmux window
+    command: (
+        str  # When `command` is empty, it will be used to print the current tmux window
+    )
+    is_input: bool = False  # if True, the command is an input to the running process
    thought: str = ''
    blocking: bool = False
    # If blocking is True, the command will be run in a blocking manner.
@ -28,7 +30,7 @@ class CmdRunAction(Action):
        return f'Running command: {self.command}'

    def __str__(self) -> str:
-        ret = f'**CmdRunAction (source={self.source})**\n'
+        ret = f'**CmdRunAction (source={self.source}, is_input={self.is_input})**\n'
        if self.thought:
            ret += f'THOUGHT: {self.thought}\n'
        ret += f'COMMAND:\n{self.command}'
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@ -197,9 +197,10 @@ class Runtime(FileEditRuntimeMixin):
                e, AgentRuntimeDisconnectedError
            ):
                err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
-            self.log('error', f'Unexpected error while running action: {str(e)}')
+            error_message = f'{type(e).__name__}: {str(e)}'
+            self.log('error', f'Unexpected error while running action: {error_message}')
            self.log('error', f'Problematic action: {str(event)}')
-            self.send_error_message(err_id, str(e))
+            self.send_error_message(err_id, error_message)
            self.close()
            return

--- a/openhands/runtime/impl/action_execution/action_execution_client.py
+++ b/openhands/runtime/impl/action_execution/action_execution_client.py
@ -59,6 +59,7 @@ class ActionExecutionClient(Runtime):
        self.session = HttpSession()
        self.action_semaphore = threading.Semaphore(1)  # Ensure one action at a time
        self._runtime_initialized: bool = False
+        self._runtime_closed: bool = False
        self._vscode_token: str | None = None  # initial dummy value
        super().__init__(
            config,
@ -283,4 +284,9 @@ class ActionExecutionClient(Runtime):
        return self.send_action_for_execution(action)

    def close(self) -> None:
+        # Make sure we don't close the session multiple times
+        # Can happen in evaluation
+        if self._runtime_closed:
+            return
+        self._runtime_closed = True
        self.session.close()
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@ -13,6 +13,7 @@ from openhands.core.exceptions import (
    AgentRuntimeNotReadyError,
    AgentRuntimeUnavailableError,
 )
+from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.runtime.builder.remote import RemoteRuntimeBuilder
 from openhands.runtime.impl.action_execution.action_execution_client import (
@ -75,6 +76,10 @@ class RemoteRuntime(ActionExecutionClient):
        self.available_hosts: dict[str, int] = {}
        self._runtime_initialized: bool = False

+    def log(self, level: str, message: str) -> None:
+        message = f'[runtime session_id={self.sid} runtime_id={self.runtime_id or "unknown"}] {message}'
+        getattr(logger, level)(message, stacklevel=2)
+
    def _get_action_execution_server_host(self):
        return self.runtime_url

@ -350,20 +355,33 @@ class RemoteRuntime(ActionExecutionClient):
            super().close()
            return
        try:
-            with self._send_runtime_api_request(
-                'POST',
-                f'{self.config.sandbox.remote_runtime_api_url}/stop',
-                json={'runtime_id': self.runtime_id},
-            ):
-                self.log('debug', 'Runtime stopped.')
+            if not self._runtime_closed:
+                with self._send_runtime_api_request(
+                    'POST',
+                    f'{self.config.sandbox.remote_runtime_api_url}/stop',
+                    json={'runtime_id': self.runtime_id},
+                ):
+                    self.log('debug', 'Runtime stopped.')
        except Exception as e:
            raise e
        finally:
            super().close()

    def _send_runtime_api_request(self, method, url, **kwargs):
-        return send_request(self.session, method, url, **kwargs)
+        try:
+            return send_request(self.session, method, url, **kwargs)
+        except requests.Timeout:
+            self.log(
+                'error',
+                f'No response received within the timeout period for url: {url}',
+            )
+            raise

+    @tenacity.retry(
+        retry=tenacity.retry_if_exception_type(ConnectionError),
+        stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(),
+        wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
+    )
    def _send_action_server_request(self, method, url, **kwargs):
        try:
            return super()._send_action_server_request(method, url, **kwargs)
@ -375,14 +393,14 @@ class RemoteRuntime(ActionExecutionClient):
            raise

        except requests.HTTPError as e:
-            if e.response.status_code in (404, 502):
+            if e.response.status_code in (404, 502, 504):
                if e.response.status_code == 404:
                    raise AgentRuntimeDisconnectedError(
-                        'Runtime is not responding. This may be temporary, please try again.'
+                        f'Runtime is not responding. This may be temporary, please try again. Original error: {e}'
                    ) from e
-                else:  # 502
+                else:  # 502, 504
                    raise AgentRuntimeDisconnectedError(
-                        'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again.'
+                        f'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again. Original error: {e}'
                    ) from e
            elif e.response.status_code == 503:
                self.log('warning', 'Runtime appears to be paused. Resuming...')
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@ -461,22 +461,28 @@ class BashSession:
        # Strip the command of any leading/trailing whitespace
        logger.debug(f'RECEIVED ACTION: {action}')
        command = action.command.strip()
-        is_special_key = self._is_special_key(command)
+        is_input: bool = action.is_input

-        # Handle when prev command is hard timeout
-
-        if command == '' and self.prev_status not in {
+        # If the previous command is not completed, we need to check if the command is empty
+        if self.prev_status not in {
            BashCommandStatus.CONTINUE,
            BashCommandStatus.NO_CHANGE_TIMEOUT,
            BashCommandStatus.HARD_TIMEOUT,
        }:
-            return CmdOutputObservation(
-                content='ERROR: No previous command to continue from. '
-                + 'Previous command has to be timeout to be continued.',
-                command='',
-                metadata=CmdOutputMetadata(),
-            )
+            if command == '':
+                return CmdOutputObservation(
+                    content='ERROR: No previous running command to retrieve logs from.',
+                    command='',
+                    metadata=CmdOutputMetadata(),
+                )
+            if is_input:
+                return CmdOutputObservation(
+                    content='ERROR: No previous running command to interact with.',
+                    command='',
+                    metadata=CmdOutputMetadata(),
+                )

+        # Check if the command is a single command or multiple commands
        splited_commands = split_bash_commands(command)
        if len(splited_commands) > 1:
            return ErrorObservation(
@ -491,46 +497,62 @@ class BashSession:
        last_change_time = start_time
        last_pane_output = self._get_pane_content()

-        # Do not check hard timeout if the command is a special key
-        if command != '' and is_special_key:
-            logger.debug(f'SENDING SPECIAL KEY: {command!r}')
-            self.pane.send_keys(command, enter=False)
-        # When prev command is hard timeout, and we are trying to execute new command
-        elif self.prev_status == BashCommandStatus.HARD_TIMEOUT and command != '':
-            if not last_pane_output.endswith(CMD_OUTPUT_PS1_END):
-                _ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
-                raw_command_output = self._combine_outputs_between_matches(
-                    last_pane_output, _ps1_matches
-                )
-                metadata = CmdOutputMetadata()  # No metadata available
-                metadata.suffix = (
-                    f'\n[Your command "{command}" is NOT executed. '
-                    f'The previous command was timed out but still running. Above is the output of the previous command. '
-                    "You may wait longer to see additional output of the previous command by sending empty command '', "
-                    'send other commands to interact with the current process, '
-                    'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
-                )
-                command_output = self._get_command_output(
-                    command,
-                    raw_command_output,
-                    metadata,
-                    continue_prefix='[Below is the output of the previous command.]\n',
-                )
-                return CmdOutputObservation(
-                    command=command,
-                    content=command_output,
-                    metadata=metadata,
-                )
-        # Only send the command to the pane if it's not a special key and it's not empty
-        # AND previous hard timeout command is resolved
-        elif command != '' and not is_special_key:
-            # convert command to raw string
-            command = escape_bash_special_chars(command)
-            logger.debug(f'SENDING COMMAND: {command!r}')
-            self.pane.send_keys(
-                command,
-                enter=True,
+        # When prev command is still running, and we are trying to send a new command
+        if (
+            self.prev_status
+            in {
+                BashCommandStatus.HARD_TIMEOUT,
+                BashCommandStatus.NO_CHANGE_TIMEOUT,
+            }
+            and not last_pane_output.endswith(
+                CMD_OUTPUT_PS1_END
+            )  # prev command is not completed
+            and not is_input
+            and command != ''  # not input and not empty command
+        ):
+            _ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
+            raw_command_output = self._combine_outputs_between_matches(
+                last_pane_output, _ps1_matches
            )
+            metadata = CmdOutputMetadata()  # No metadata available
+            metadata.suffix = (
+                f'\n[Your command "{command}" is NOT executed. '
+                f'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
+                'By setting `is_input` to `true`, you can interact with the current process: '
+                "You may wait longer to see additional output of the previous command by sending empty command '', "
+                'send other commands to interact with the current process, '
+                'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
+            )
+            logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
+            command_output = self._get_command_output(
+                command,
+                raw_command_output,
+                metadata,
+                continue_prefix='[Below is the output of the previous command.]\n',
+            )
+            return CmdOutputObservation(
+                command=command,
+                content=command_output,
+                metadata=metadata,
+            )
+
+        # Send actual command/inputs to the pane
+        if command != '':
+            is_special_key = self._is_special_key(command)
+            if is_input:
+                logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
+                self.pane.send_keys(
+                    command,
+                    enter=not is_special_key,
+                )
+            else:
+                # convert command to raw string
+                command = escape_bash_special_chars(command)
+                logger.debug(f'SENDING COMMAND: {command!r}')
+                self.pane.send_keys(
+                    command,
+                    enter=not is_special_key,
+                )

        # Loop until the command completes or times out
        while should_continue():
--- a/tests/runtime/test_bash.py
+++ b/tests/runtime/test_bash.py
@ -57,7 +57,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
            in obs.metadata.suffix
        )

-        action = CmdRunAction(command='C-c')
+        action = CmdRunAction(command='C-c', is_input=True)
        action.set_hard_timeout(30)
        obs = runtime.run_action(action)
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@ -571,7 +571,7 @@ def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
        assert 'Enter name:' in obs.content
        assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix

-        action = CmdRunAction('John')
+        action = CmdRunAction('John', is_input=True)
        obs = runtime.run_action(action)
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
        assert 'Hello John' in obs.content
@ -741,10 +741,7 @@ def test_long_running_command_follow_by_execute(
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
        assert '3' not in obs.content
        assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-        assert (
-            'The previous command was timed out but still running.'
-            in obs.metadata.suffix
-        )
+        assert 'The previous command is still running' in obs.metadata.suffix
        assert obs.metadata.exit_code == -1  # -1 indicates command is still running

        # Finally continue again
@ -763,7 +760,9 @@ def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
        # Test empty command without previous command
        obs = runtime.run_action(CmdRunAction(''))
        assert isinstance(obs, CmdOutputObservation)
-        assert 'ERROR: No previous command to continue from' in obs.content
+        assert (
+            'ERROR: No previous running command to retrieve logs from.' in obs.content
+        )
    finally:
        _close_test_runtime(runtime)

@ -781,13 +780,52 @@ def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
        assert obs.metadata.exit_code == -1  # -1 indicates command is still running

        # Send first input (name)
-        obs = runtime.run_action(CmdRunAction('Alice'))
+        obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
        assert 'Enter your age:' in obs.content
        assert obs.metadata.exit_code == -1

        # Send second input (age)
-        obs = runtime.run_action(CmdRunAction('25'))
+        obs = runtime.run_action(CmdRunAction('25', is_input=True))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Hello Alice, you are 25 years old' in obs.content
+        assert obs.metadata.exit_code == 0
+        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
+    finally:
+        _close_test_runtime(runtime)
+
+
+def test_python_interactive_input_without_set_input(
+    temp_dir, runtime_cls, run_as_openhands
+):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    try:
+        # Test Python program that asks for input - properly escaped for bash
+        python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
+
+        # Start Python with the interactive script
+        obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"'))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Enter your name:' in obs.content
+        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
+
+        # Send first input (name)
+        obs = runtime.run_action(CmdRunAction('Alice', is_input=False))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Enter your age:' not in obs.content
+        assert (
+            'Your command "Alice" is NOT executed. The previous command is still running'
+            in obs.metadata.suffix
+        )
+        assert obs.metadata.exit_code == -1
+
+        # Try again now with input
+        obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Enter your age:' in obs.content
+        assert obs.metadata.exit_code == -1
+
+        obs = runtime.run_action(CmdRunAction('25', is_input=True))
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
        assert 'Hello Alice, you are 25 years old' in obs.content
        assert obs.metadata.exit_code == 0
@ -844,7 +882,7 @@ def test_stress_long_output_with_soft_and_hard_timeout(
            assert obs.exit_code == -1  # Command is still running, waiting for input

            # Send the confirmation
-            action = CmdRunAction('Y')
+            action = CmdRunAction('Y', is_input=True)
            obs = runtime.run_action(action)
            assert 'Proceeding with operation...' in obs.content
            assert 'Operation completed successfully!' in obs.content
@ -869,13 +907,10 @@ def test_stress_long_output_with_soft_and_hard_timeout(
            # where it will not accept any new commands.
            obs = runtime.run_action(CmdRunAction('ls'))
            assert obs.exit_code == -1
-            assert (
-                'The previous command was timed out but still running.'
-                in obs.metadata.suffix
-            )
+            assert 'The previous command is still running' in obs.metadata.suffix

            # We need to send a Ctrl+C to reset the terminal.
-            obs = runtime.run_action(CmdRunAction('C-c'))
+            obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
            assert obs.exit_code == 130

            # Now make sure the terminal is in a good state
@ -887,3 +922,25 @@ def test_stress_long_output_with_soft_and_hard_timeout(

    finally:
        _close_test_runtime(runtime)
+
+
+def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    try:
+        # create a git repo
+        action = CmdRunAction(
+            'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
+        )
+        obs = runtime.run_action(action)
+        # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert obs.metadata.exit_code == 0
+
+        # Start Python with the interactive script
+        obs = runtime.run_action(CmdRunAction('git remote -v'))
+        # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert obs.metadata.exit_code == 0
+        assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
+        assert 'git remote -v' not in obs.content
+
+    finally:
+        _close_test_runtime(runtime)
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@ -97,6 +97,7 @@ def test_cmd_run_action_serialization_deserialization():
        'args': {
            'blocking': False,
            'command': 'echo "Hello world"',
+            'is_input': False,
            'thought': '',
            'hidden': False,
            'confirmation_state': ActionConfirmationStatus.CONFIRMED,
@ -181,3 +182,4 @@ def test_legacy_serialization():
    assert event_dict['args']['blocking'] is False
    assert event_dict['args']['command'] == 'echo "Hello world"'
    assert event_dict['args']['thought'] == ''
+    assert event_dict['args']['is_input'] is False
--- a/tests/unit/test_bash_session.py
+++ b/tests/unit/test_bash_session.py
@ -1,5 +1,6 @@
 import os
 import tempfile
+import time

 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import CmdRunAction
@ -91,7 +92,7 @@ def test_long_running_command_follow_by_execute():
    assert obs.metadata.prefix == ''

    # Continue watching output
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '2' in obs.content
    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
@ -107,14 +108,20 @@ def test_long_running_command_follow_by_execute():
    # Test command that produces no output
    obs = session.execute(CmdRunAction('sleep 15'))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert '3' not in obs.content
+    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
+    assert 'The previous command is still running' in obs.metadata.suffix
+    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
+    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
+
+    time.sleep(3)
+
+    # Run it again, this time it should produce output
+    obs = session.execute(CmdRunAction('sleep 15'))
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '3' in obs.content
    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert obs.metadata.suffix == (
-        '\n[The command has no new output after 2 seconds. '
-        "You may wait longer to see additional output by sending empty command '', "
-        'send other commands to interact with the current process, '
-        'or send keys to interrupt/kill the command.]'
-    )
+    assert 'The previous command is still running' in obs.metadata.suffix
    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

@ -144,7 +151,7 @@ def test_interactive_command():
    assert obs.metadata.prefix == ''

    # Send input
-    obs = session.execute(CmdRunAction('John'))
+    obs = session.execute(CmdRunAction('John', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert 'Hello John' in obs.content
    assert obs.metadata.exit_code == 0
@ -165,7 +172,7 @@ def test_interactive_command():
    )
    assert obs.metadata.prefix == ''

-    obs = session.execute(CmdRunAction('line 1'))
+    obs = session.execute(CmdRunAction('line 1', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.metadata.exit_code == -1
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@ -177,7 +184,7 @@ def test_interactive_command():
    )
    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'

-    obs = session.execute(CmdRunAction('line 2'))
+    obs = session.execute(CmdRunAction('line 2', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.metadata.exit_code == -1
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@ -189,7 +196,7 @@ def test_interactive_command():
    )
    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'

-    obs = session.execute(CmdRunAction('EOF'))
+    obs = session.execute(CmdRunAction('EOF', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert 'line 1' in obs.content and 'line 2' in obs.content
    assert obs.metadata.exit_code == 0
@ -220,7 +227,7 @@ def test_ctrl_c():
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

    # Send Ctrl+C
-    obs = session.execute(CmdRunAction('C-c'))
+    obs = session.execute(CmdRunAction('C-c', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.metadata.exit_code == 130  # Standard exit code for Ctrl+C
    assert (
@ -240,10 +247,7 @@ def test_empty_command_errors():
    # Test empty command without previous command
    obs = session.execute(CmdRunAction(''))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert (
-        obs.content
-        == 'ERROR: No previous command to continue from. Previous command has to be timeout to be continued.'
-    )
+    assert obs.content == 'ERROR: No previous running command to retrieve logs from.'
    assert obs.metadata.exit_code == -1
    assert obs.metadata.prefix == ''
    assert obs.metadata.suffix == ''
@ -264,14 +268,14 @@ def test_command_output_continuation():
    assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '[Below is the output of the previous command.]' in obs.metadata.prefix
    assert obs.content.strip() == '2'
    assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '[Below is the output of the previous command.]' in obs.metadata.prefix
    assert obs.content.strip() == '3'
@ -279,21 +283,21 @@ def test_command_output_continuation():
    assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '[Below is the output of the previous command.]' in obs.metadata.prefix
    assert obs.content.strip() == '4'
    assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '[Below is the output of the previous command.]' in obs.metadata.prefix
    assert obs.content.strip() == '5'
    assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert '[The command completed with exit code 0.]' in obs.metadata.suffix
    assert session.prev_status == BashCommandStatus.COMPLETED
@ -367,14 +371,14 @@ def test_python_interactive_input():
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

    # Send first input (name)
-    obs = session.execute(CmdRunAction('Alice'))
+    obs = session.execute(CmdRunAction('Alice', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert 'Enter your age:' in obs.content
    assert obs.metadata.exit_code == -1
    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

    # Send second input (age)
-    obs = session.execute(CmdRunAction('25'))
+    obs = session.execute(CmdRunAction('25', is_input=True))
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert 'Hello Alice, you are 25 years old' in obs.content
    assert obs.metadata.exit_code == 0
--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@ -367,6 +367,7 @@ async def test_unsafe_bash_command(temp_dir: str):
                        arguments={
                            'blocking': False,
                            'command': 'ls',
+                            'is_input': False,
                            'hidden': False,
                            'confirmation_state': ActionConfirmationStatus.CONFIRMED,
                        },