From 899c1f8360bafafe3d575664e714223f68a401b3 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 17 Jan 2025 14:31:23 -0500
Subject: [PATCH] fix(bash): also show timeout reminder when no_change_timeout
 is triggered (#6318)

Co-authored-by: Robert Brennan <accounts@rbren.io>
---
 evaluation/benchmarks/swe_bench/run_infer.py  |   2 +-
 .../tests/t07_interactive_commands.py         |  73 +++++++++++
 evaluation/utils/shared.py                    |  33 +++--
 .../codeact_agent/function_calling.py         |  13 +-
 openhands/events/action/commands.py           |   8 +-
 openhands/runtime/base.py                     |   5 +-
 .../action_execution_client.py                |   6 +
 .../runtime/impl/remote/remote_runtime.py     |  40 ++++--
 openhands/runtime/utils/bash.py               | 120 +++++++++++-------
 tests/runtime/test_bash.py                    |  87 ++++++++++---
 tests/unit/test_action_serialization.py       |   2 +
 tests/unit/test_bash_session.py               |  50 ++++----
 tests/unit/test_security.py                   |   1 +
 13 files changed, 324 insertions(+), 116 deletions(-)
 create mode 100644 evaluation/integration_tests/tests/t07_interactive_commands.py

diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index 8375295b99..511a22c4ed 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -359,7 +359,7 @@ def complete_runtime(
         action = CmdRunAction(
             command=f'git diff --no-color --cached {instance["base_commit"]}'
         )
-        action.timeout = max(300 + 100 * n_retries, 600)
+        action.set_hard_timeout(max(300 + 100 * n_retries, 600))
         logger.info(action, extra={'msg_type': 'ACTION'})
         obs = runtime.run_action(action)
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
diff --git a/evaluation/integration_tests/tests/t07_interactive_commands.py b/evaluation/integration_tests/tests/t07_interactive_commands.py
new file mode 100644
index 0000000000..24a66d3f38
--- /dev/null
+++ b/evaluation/integration_tests/tests/t07_interactive_commands.py
@@ -0,0 +1,73 @@
+import hashlib
+
+from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
+from openhands.events.action import (
+    AgentFinishAction,
+    FileWriteAction,
+    MessageAction,
+)
+from openhands.events.event import Event
+from openhands.events.observation import AgentDelegateObservation
+from openhands.runtime.base import Runtime
+
+
+class Test(BaseIntegrationTest):
+    INSTRUCTION = 'Execute the python script /workspace/python_script.py with input "John" and "25" and tell me the secret number.'
+    SECRET_NUMBER = int(hashlib.sha256(str(25).encode()).hexdigest()[:8], 16) % 1000
+
+    @classmethod
+    def initialize_runtime(cls, runtime: Runtime) -> None:
+        from openhands.core.logger import openhands_logger as logger
+
+        action = FileWriteAction(
+            path='/workspace/python_script.py',
+            content=(
+                'name = input("Enter your name: "); age = input("Enter your age: "); '
+                'import hashlib; secret = int(hashlib.sha256(str(age).encode()).hexdigest()[:8], 16) % 1000; '
+                'print(f"Hello {name}, you are {age} years old. Tell you a secret number: {secret}")'
+            ),
+        )
+        logger.info(action, extra={'msg_type': 'ACTION'})
+        observation = runtime.run_action(action)
+        logger.info(observation, extra={'msg_type': 'OBSERVATION'})
+
+    @classmethod
+    def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
+        from openhands.core.logger import openhands_logger as logger
+
+        # check if the license information is in any message
+        message_actions = [
+            event
+            for event in histories
+            if isinstance(
+                event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
+            )
+        ]
+        logger.info(f'Total message-like events: {len(message_actions)}')
+
+        for event in message_actions:
+            try:
+                if isinstance(event, AgentDelegateObservation):
+                    content = event.content
+                elif isinstance(event, AgentFinishAction):
+                    content = event.outputs.get('content', '')
+                    if event.thought:
+                        content += f'\n\n{event.thought}'
+                elif isinstance(event, MessageAction):
+                    content = event.content
+                else:
+                    logger.warning(f'Unexpected event type: {type(event)}')
+                    continue
+
+                if str(cls.SECRET_NUMBER) in content:
+                    return TestResult(success=True)
+            except Exception as e:
+                logger.error(f'Error processing event: {e}')
+
+        logger.debug(
+            f'Total messages: {len(message_actions)}. Messages: {message_actions}'
+        )
+        return TestResult(
+            success=False,
+            reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
+        )
diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
index 23c5b319d3..5b3ce8c8bd 100644
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -371,7 +371,6 @@ def _process_instance_wrapper(
             error = str(e)
             stacktrace = traceback.format_exc()
             if attempt == max_retries:
-                logger.exception(e)
                 msg = (
                     '-' * 10
                     + '\n'
@@ -395,19 +394,13 @@ def _process_instance_wrapper(
                 + '-' * 10
                 + '\n'
             )
-            if isinstance(
-                e,
-                (
-                    AgentRuntimeDisconnectedError,
-                    AgentRuntimeUnavailableError,
-                    AgentRuntimeNotFoundError,
-                ),
-            ):
+            # e is likely an EvalException, so we can't directly infer it from type
+            # but rather check if it's a fatal error
+            if is_fatal_runtime_error(str(e)):
                 runtime_failure_count += 1
                 msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
+                msg += '\n' + '-' * 10 + '\n'
             logger.error(msg)
-            if use_mp:
-                print(msg)  # use print to directly print to console
             time.sleep(5)
 
 
@@ -564,6 +557,7 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
         AgentRuntimeNotReadyError,
         AgentRuntimeDisconnectedError,
         AgentRuntimeNotFoundError,
+        ConnectionError,
     ]
 
     if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
@@ -573,6 +567,23 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
     return False
 
 
+def is_fatal_runtime_error(error: str | None) -> bool:
+    if not error:
+        return False
+
+    FATAL_RUNTIME_ERRORS = [
+        AgentRuntimeUnavailableError,
+        AgentRuntimeDisconnectedError,
+        AgentRuntimeNotFoundError,
+    ]
+
+    if any(exception.__name__ in error for exception in FATAL_RUNTIME_ERRORS):
+        logger.error(f'Fatal runtime error detected: {error}')
+        return True
+
+    return False
+
+
 def get_metrics(state: State) -> dict[str, Any]:
     """Extract metrics from the state."""
     metrics = state.metrics.get() if state.metrics else {}
diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
index d06cf01cd3..af07393dd4 100644
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -31,7 +31,7 @@ from openhands.events.tool import ToolCallMetadata
 
 _BASH_DESCRIPTION = """Execute a bash command in the terminal.
 * Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
-* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
+* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
 """
 
 CmdRunTool = ChatCompletionToolParam(
@@ -46,6 +46,11 @@ CmdRunTool = ChatCompletionToolParam(
                     'type': 'string',
                     'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
                 },
+                'is_input': {
+                    'type': 'string',
+                    'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
+                    'enum': ['true', 'false'],
+                },
             },
             'required': ['command'],
         },
@@ -488,6 +493,12 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                     f'Failed to parse tool call arguments: {tool_call.function.arguments}'
                 ) from e
             if tool_call.function.name == 'execute_bash':
+                # this is an LLM error: add empty command to avoid breaking the tool call
+                if 'command' not in arguments:
+                    arguments['command'] = ''
+                # convert is_input to boolean
+                if 'is_input' in arguments:
+                    arguments['is_input'] = arguments['is_input'] == 'true'
                 action = CmdRunAction(**arguments)
             elif tool_call.function.name == 'execute_ipython_cell':
                 action = IPythonRunCellAction(**arguments)
diff --git a/openhands/events/action/commands.py b/openhands/events/action/commands.py
index 67b586f86a..ab5c77de29 100644
--- a/openhands/events/action/commands.py
+++ b/openhands/events/action/commands.py
@@ -11,8 +11,10 @@ from openhands.events.action.action import (
 
 @dataclass
 class CmdRunAction(Action):
-    command: str
-    # When `command` is empty, it will be used to print the current tmux window
+    command: (
+        str  # When `command` is empty, it will be used to print the current tmux window
+    )
+    is_input: bool = False  # if True, the command is an input to the running process
     thought: str = ''
     blocking: bool = False
     # If blocking is True, the command will be run in a blocking manner.
@@ -28,7 +30,7 @@ class CmdRunAction(Action):
         return f'Running command: {self.command}'
 
     def __str__(self) -> str:
-        ret = f'**CmdRunAction (source={self.source})**\n'
+        ret = f'**CmdRunAction (source={self.source}, is_input={self.is_input})**\n'
         if self.thought:
             ret += f'THOUGHT: {self.thought}\n'
         ret += f'COMMAND:\n{self.command}'
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 0505203cf6..4e5729ac77 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -197,9 +197,10 @@ class Runtime(FileEditRuntimeMixin):
                 e, AgentRuntimeDisconnectedError
             ):
                 err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
-            self.log('error', f'Unexpected error while running action: {str(e)}')
+            error_message = f'{type(e).__name__}: {str(e)}'
+            self.log('error', f'Unexpected error while running action: {error_message}')
             self.log('error', f'Problematic action: {str(event)}')
-            self.send_error_message(err_id, str(e))
+            self.send_error_message(err_id, error_message)
             self.close()
             return
 
diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py
index a2455dd2b6..38afc54487 100644
--- a/openhands/runtime/impl/action_execution/action_execution_client.py
+++ b/openhands/runtime/impl/action_execution/action_execution_client.py
@@ -59,6 +59,7 @@ class ActionExecutionClient(Runtime):
         self.session = HttpSession()
         self.action_semaphore = threading.Semaphore(1)  # Ensure one action at a time
         self._runtime_initialized: bool = False
+        self._runtime_closed: bool = False
         self._vscode_token: str | None = None  # initial dummy value
         super().__init__(
             config,
@@ -283,4 +284,9 @@ class ActionExecutionClient(Runtime):
         return self.send_action_for_execution(action)
 
     def close(self) -> None:
+        # Make sure we don't close the session multiple times
+        # Can happen in evaluation
+        if self._runtime_closed:
+            return
+        self._runtime_closed = True
         self.session.close()
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 2842a7b50e..41e1620e29 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -13,6 +13,7 @@ from openhands.core.exceptions import (
     AgentRuntimeNotReadyError,
     AgentRuntimeUnavailableError,
 )
+from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.runtime.builder.remote import RemoteRuntimeBuilder
 from openhands.runtime.impl.action_execution.action_execution_client import (
@@ -75,6 +76,10 @@ class RemoteRuntime(ActionExecutionClient):
         self.available_hosts: dict[str, int] = {}
         self._runtime_initialized: bool = False
 
+    def log(self, level: str, message: str) -> None:
+        message = f'[runtime session_id={self.sid} runtime_id={self.runtime_id or "unknown"}] {message}'
+        getattr(logger, level)(message, stacklevel=2)
+
     def _get_action_execution_server_host(self):
         return self.runtime_url
 
@@ -350,20 +355,33 @@ class RemoteRuntime(ActionExecutionClient):
             super().close()
             return
         try:
-            with self._send_runtime_api_request(
-                'POST',
-                f'{self.config.sandbox.remote_runtime_api_url}/stop',
-                json={'runtime_id': self.runtime_id},
-            ):
-                self.log('debug', 'Runtime stopped.')
+            if not self._runtime_closed:
+                with self._send_runtime_api_request(
+                    'POST',
+                    f'{self.config.sandbox.remote_runtime_api_url}/stop',
+                    json={'runtime_id': self.runtime_id},
+                ):
+                    self.log('debug', 'Runtime stopped.')
         except Exception as e:
             raise e
         finally:
             super().close()
 
     def _send_runtime_api_request(self, method, url, **kwargs):
-        return send_request(self.session, method, url, **kwargs)
+        try:
+            return send_request(self.session, method, url, **kwargs)
+        except requests.Timeout:
+            self.log(
+                'error',
+                f'No response received within the timeout period for url: {url}',
+            )
+            raise
 
+    @tenacity.retry(
+        retry=tenacity.retry_if_exception_type(ConnectionError),
+        stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(),
+        wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
+    )
     def _send_action_server_request(self, method, url, **kwargs):
         try:
             return super()._send_action_server_request(method, url, **kwargs)
@@ -375,14 +393,14 @@ class RemoteRuntime(ActionExecutionClient):
             raise
 
         except requests.HTTPError as e:
-            if e.response.status_code in (404, 502):
+            if e.response.status_code in (404, 502, 504):
                 if e.response.status_code == 404:
                     raise AgentRuntimeDisconnectedError(
-                        'Runtime is not responding. This may be temporary, please try again.'
+                        f'Runtime is not responding. This may be temporary, please try again. Original error: {e}'
                     ) from e
-                else:  # 502
+                else:  # 502, 504
                     raise AgentRuntimeDisconnectedError(
-                        'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again.'
+                        f'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again. Original error: {e}'
                     ) from e
             elif e.response.status_code == 503:
                 self.log('warning', 'Runtime appears to be paused. Resuming...')
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index 87b2ae405f..9da848c44d 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -461,22 +461,28 @@ class BashSession:
         # Strip the command of any leading/trailing whitespace
         logger.debug(f'RECEIVED ACTION: {action}')
         command = action.command.strip()
-        is_special_key = self._is_special_key(command)
+        is_input: bool = action.is_input
 
-        # Handle when prev command is hard timeout
-
-        if command == '' and self.prev_status not in {
+        # If the previous command is not completed, we need to check if the command is empty
+        if self.prev_status not in {
             BashCommandStatus.CONTINUE,
             BashCommandStatus.NO_CHANGE_TIMEOUT,
             BashCommandStatus.HARD_TIMEOUT,
         }:
-            return CmdOutputObservation(
-                content='ERROR: No previous command to continue from. '
-                + 'Previous command has to be timeout to be continued.',
-                command='',
-                metadata=CmdOutputMetadata(),
-            )
+            if command == '':
+                return CmdOutputObservation(
+                    content='ERROR: No previous running command to retrieve logs from.',
+                    command='',
+                    metadata=CmdOutputMetadata(),
+                )
+            if is_input:
+                return CmdOutputObservation(
+                    content='ERROR: No previous running command to interact with.',
+                    command='',
+                    metadata=CmdOutputMetadata(),
+                )
 
+        # Check if the command is a single command or multiple commands
         splited_commands = split_bash_commands(command)
         if len(splited_commands) > 1:
             return ErrorObservation(
@@ -491,46 +497,62 @@ class BashSession:
         last_change_time = start_time
         last_pane_output = self._get_pane_content()
 
-        # Do not check hard timeout if the command is a special key
-        if command != '' and is_special_key:
-            logger.debug(f'SENDING SPECIAL KEY: {command!r}')
-            self.pane.send_keys(command, enter=False)
-        # When prev command is hard timeout, and we are trying to execute new command
-        elif self.prev_status == BashCommandStatus.HARD_TIMEOUT and command != '':
-            if not last_pane_output.endswith(CMD_OUTPUT_PS1_END):
-                _ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
-                raw_command_output = self._combine_outputs_between_matches(
-                    last_pane_output, _ps1_matches
-                )
-                metadata = CmdOutputMetadata()  # No metadata available
-                metadata.suffix = (
-                    f'\n[Your command "{command}" is NOT executed. '
-                    f'The previous command was timed out but still running. Above is the output of the previous command. '
-                    "You may wait longer to see additional output of the previous command by sending empty command '', "
-                    'send other commands to interact with the current process, '
-                    'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
-                )
-                command_output = self._get_command_output(
-                    command,
-                    raw_command_output,
-                    metadata,
-                    continue_prefix='[Below is the output of the previous command.]\n',
-                )
-                return CmdOutputObservation(
-                    command=command,
-                    content=command_output,
-                    metadata=metadata,
-                )
-        # Only send the command to the pane if it's not a special key and it's not empty
-        # AND previous hard timeout command is resolved
-        elif command != '' and not is_special_key:
-            # convert command to raw string
-            command = escape_bash_special_chars(command)
-            logger.debug(f'SENDING COMMAND: {command!r}')
-            self.pane.send_keys(
-                command,
-                enter=True,
+        # When prev command is still running, and we are trying to send a new command
+        if (
+            self.prev_status
+            in {
+                BashCommandStatus.HARD_TIMEOUT,
+                BashCommandStatus.NO_CHANGE_TIMEOUT,
+            }
+            and not last_pane_output.endswith(
+                CMD_OUTPUT_PS1_END
+            )  # prev command is not completed
+            and not is_input
+            and command != ''  # not input and not empty command
+        ):
+            _ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
+            raw_command_output = self._combine_outputs_between_matches(
+                last_pane_output, _ps1_matches
             )
+            metadata = CmdOutputMetadata()  # No metadata available
+            metadata.suffix = (
+                f'\n[Your command "{command}" is NOT executed. '
+                f'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
+                'By setting `is_input` to `true`, you can interact with the current process: '
+                "You may wait longer to see additional output of the previous command by sending empty command '', "
+                'send other commands to interact with the current process, '
+                'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
+            )
+            logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
+            command_output = self._get_command_output(
+                command,
+                raw_command_output,
+                metadata,
+                continue_prefix='[Below is the output of the previous command.]\n',
+            )
+            return CmdOutputObservation(
+                command=command,
+                content=command_output,
+                metadata=metadata,
+            )
+
+        # Send actual command/inputs to the pane
+        if command != '':
+            is_special_key = self._is_special_key(command)
+            if is_input:
+                logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
+                self.pane.send_keys(
+                    command,
+                    enter=not is_special_key,
+                )
+            else:
+                # convert command to raw string
+                command = escape_bash_special_chars(command)
+                logger.debug(f'SENDING COMMAND: {command!r}')
+                self.pane.send_keys(
+                    command,
+                    enter=not is_special_key,
+                )
 
         # Loop until the command completes or times out
         while should_continue():
diff --git a/tests/runtime/test_bash.py b/tests/runtime/test_bash.py
index 828c859f11..4af28d9065 100644
--- a/tests/runtime/test_bash.py
+++ b/tests/runtime/test_bash.py
@@ -57,7 +57,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
             in obs.metadata.suffix
         )
 
-        action = CmdRunAction(command='C-c')
+        action = CmdRunAction(command='C-c', is_input=True)
         action.set_hard_timeout(30)
         obs = runtime.run_action(action)
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -571,7 +571,7 @@ def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
         assert 'Enter name:' in obs.content
         assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
 
-        action = CmdRunAction('John')
+        action = CmdRunAction('John', is_input=True)
         obs = runtime.run_action(action)
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
         assert 'Hello John' in obs.content
@@ -741,10 +741,7 @@ def test_long_running_command_follow_by_execute(
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
         assert '3' not in obs.content
         assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-        assert (
-            'The previous command was timed out but still running.'
-            in obs.metadata.suffix
-        )
+        assert 'The previous command is still running' in obs.metadata.suffix
         assert obs.metadata.exit_code == -1  # -1 indicates command is still running
 
         # Finally continue again
@@ -763,7 +760,9 @@ def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
         # Test empty command without previous command
         obs = runtime.run_action(CmdRunAction(''))
         assert isinstance(obs, CmdOutputObservation)
-        assert 'ERROR: No previous command to continue from' in obs.content
+        assert (
+            'ERROR: No previous running command to retrieve logs from.' in obs.content
+        )
     finally:
         _close_test_runtime(runtime)
 
@@ -781,13 +780,52 @@ def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
         assert obs.metadata.exit_code == -1  # -1 indicates command is still running
 
         # Send first input (name)
-        obs = runtime.run_action(CmdRunAction('Alice'))
+        obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
         assert 'Enter your age:' in obs.content
         assert obs.metadata.exit_code == -1
 
         # Send second input (age)
-        obs = runtime.run_action(CmdRunAction('25'))
+        obs = runtime.run_action(CmdRunAction('25', is_input=True))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Hello Alice, you are 25 years old' in obs.content
+        assert obs.metadata.exit_code == 0
+        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
+    finally:
+        _close_test_runtime(runtime)
+
+
+def test_python_interactive_input_without_set_input(
+    temp_dir, runtime_cls, run_as_openhands
+):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    try:
+        # Test Python program that asks for input - properly escaped for bash
+        python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
+
+        # Start Python with the interactive script
+        obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"'))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Enter your name:' in obs.content
+        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
+
+        # Send first input (name)
+        obs = runtime.run_action(CmdRunAction('Alice', is_input=False))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Enter your age:' not in obs.content
+        assert (
+            'Your command "Alice" is NOT executed. The previous command is still running'
+            in obs.metadata.suffix
+        )
+        assert obs.metadata.exit_code == -1
+
+        # Try again now with input
+        obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert 'Enter your age:' in obs.content
+        assert obs.metadata.exit_code == -1
+
+        obs = runtime.run_action(CmdRunAction('25', is_input=True))
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
         assert 'Hello Alice, you are 25 years old' in obs.content
         assert obs.metadata.exit_code == 0
@@ -844,7 +882,7 @@ def test_stress_long_output_with_soft_and_hard_timeout(
             assert obs.exit_code == -1  # Command is still running, waiting for input
 
             # Send the confirmation
-            action = CmdRunAction('Y')
+            action = CmdRunAction('Y', is_input=True)
             obs = runtime.run_action(action)
             assert 'Proceeding with operation...' in obs.content
             assert 'Operation completed successfully!' in obs.content
@@ -869,13 +907,10 @@ def test_stress_long_output_with_soft_and_hard_timeout(
             # where it will not accept any new commands.
             obs = runtime.run_action(CmdRunAction('ls'))
             assert obs.exit_code == -1
-            assert (
-                'The previous command was timed out but still running.'
-                in obs.metadata.suffix
-            )
+            assert 'The previous command is still running' in obs.metadata.suffix
 
             # We need to send a Ctrl+C to reset the terminal.
-            obs = runtime.run_action(CmdRunAction('C-c'))
+            obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
             assert obs.exit_code == 130
 
             # Now make sure the terminal is in a good state
@@ -887,3 +922,25 @@ def test_stress_long_output_with_soft_and_hard_timeout(
 
     finally:
         _close_test_runtime(runtime)
+
+
+def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    try:
+        # create a git repo
+        action = CmdRunAction(
+            'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
+        )
+        obs = runtime.run_action(action)
+        # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert obs.metadata.exit_code == 0
+
+        # Start Python with the interactive script
+        obs = runtime.run_action(CmdRunAction('git remote -v'))
+        # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        assert obs.metadata.exit_code == 0
+        assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
+        assert 'git remote -v' not in obs.content
+
+    finally:
+        _close_test_runtime(runtime)
diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py
index 374d5eee01..32b29e44b2 100644
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@@ -97,6 +97,7 @@ def test_cmd_run_action_serialization_deserialization():
         'args': {
             'blocking': False,
             'command': 'echo "Hello world"',
+            'is_input': False,
             'thought': '',
             'hidden': False,
             'confirmation_state': ActionConfirmationStatus.CONFIRMED,
@@ -181,3 +182,4 @@ def test_legacy_serialization():
     assert event_dict['args']['blocking'] is False
     assert event_dict['args']['command'] == 'echo "Hello world"'
     assert event_dict['args']['thought'] == ''
+    assert event_dict['args']['is_input'] is False
diff --git a/tests/unit/test_bash_session.py b/tests/unit/test_bash_session.py
index fc29eaffb2..e5e0bf22b7 100644
--- a/tests/unit/test_bash_session.py
+++ b/tests/unit/test_bash_session.py
@@ -1,5 +1,6 @@
 import os
 import tempfile
+import time
 
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import CmdRunAction
@@ -91,7 +92,7 @@ def test_long_running_command_follow_by_execute():
     assert obs.metadata.prefix == ''
 
     # Continue watching output
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '2' in obs.content
     assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
@@ -107,14 +108,20 @@ def test_long_running_command_follow_by_execute():
     # Test command that produces no output
     obs = session.execute(CmdRunAction('sleep 15'))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert '3' not in obs.content
+    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
+    assert 'The previous command is still running' in obs.metadata.suffix
+    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
+    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
+
+    time.sleep(3)
+
+    # Run it again, this time it should produce output
+    obs = session.execute(CmdRunAction('sleep 15'))
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '3' in obs.content
     assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert obs.metadata.suffix == (
-        '\n[The command has no new output after 2 seconds. '
-        "You may wait longer to see additional output by sending empty command '', "
-        'send other commands to interact with the current process, '
-        'or send keys to interrupt/kill the command.]'
-    )
+    assert 'The previous command is still running' in obs.metadata.suffix
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
@@ -144,7 +151,7 @@ def test_interactive_command():
     assert obs.metadata.prefix == ''
 
     # Send input
-    obs = session.execute(CmdRunAction('John'))
+    obs = session.execute(CmdRunAction('John', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert 'Hello John' in obs.content
     assert obs.metadata.exit_code == 0
@@ -165,7 +172,7 @@ def test_interactive_command():
     )
     assert obs.metadata.prefix == ''
 
-    obs = session.execute(CmdRunAction('line 1'))
+    obs = session.execute(CmdRunAction('line 1', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.metadata.exit_code == -1
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@@ -177,7 +184,7 @@ def test_interactive_command():
     )
     assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
 
-    obs = session.execute(CmdRunAction('line 2'))
+    obs = session.execute(CmdRunAction('line 2', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.metadata.exit_code == -1
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@@ -189,7 +196,7 @@ def test_interactive_command():
     )
     assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
 
-    obs = session.execute(CmdRunAction('EOF'))
+    obs = session.execute(CmdRunAction('EOF', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert 'line 1' in obs.content and 'line 2' in obs.content
     assert obs.metadata.exit_code == 0
@@ -220,7 +227,7 @@ def test_ctrl_c():
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
     # Send Ctrl+C
-    obs = session.execute(CmdRunAction('C-c'))
+    obs = session.execute(CmdRunAction('C-c', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.metadata.exit_code == 130  # Standard exit code for Ctrl+C
     assert (
@@ -240,10 +247,7 @@ def test_empty_command_errors():
     # Test empty command without previous command
     obs = session.execute(CmdRunAction(''))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert (
-        obs.content
-        == 'ERROR: No previous command to continue from. Previous command has to be timeout to be continued.'
-    )
+    assert obs.content == 'ERROR: No previous running command to retrieve logs from.'
     assert obs.metadata.exit_code == -1
     assert obs.metadata.prefix == ''
     assert obs.metadata.suffix == ''
@@ -264,14 +268,14 @@ def test_command_output_continuation():
     assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '[Below is the output of the previous command.]' in obs.metadata.prefix
     assert obs.content.strip() == '2'
     assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '[Below is the output of the previous command.]' in obs.metadata.prefix
     assert obs.content.strip() == '3'
@@ -279,21 +283,21 @@ def test_command_output_continuation():
     assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '[Below is the output of the previous command.]' in obs.metadata.prefix
     assert obs.content.strip() == '4'
     assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '[Below is the output of the previous command.]' in obs.metadata.prefix
     assert obs.content.strip() == '5'
     assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
-    obs = session.execute(CmdRunAction(''))
+    obs = session.execute(CmdRunAction('', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert '[The command completed with exit code 0.]' in obs.metadata.suffix
     assert session.prev_status == BashCommandStatus.COMPLETED
@@ -367,14 +371,14 @@ def test_python_interactive_input():
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
     # Send first input (name)
-    obs = session.execute(CmdRunAction('Alice'))
+    obs = session.execute(CmdRunAction('Alice', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert 'Enter your age:' in obs.content
     assert obs.metadata.exit_code == -1
     assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
 
     # Send second input (age)
-    obs = session.execute(CmdRunAction('25'))
+    obs = session.execute(CmdRunAction('25', is_input=True))
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert 'Hello Alice, you are 25 years old' in obs.content
     assert obs.metadata.exit_code == 0
diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py
index 0e2b3c8a0d..b3d7ce748d 100644
--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@@ -367,6 +367,7 @@ async def test_unsafe_bash_command(temp_dir: str):
                         arguments={
                             'blocking': False,
                             'command': 'ls',
+                            'is_input': False,
                             'hidden': False,
                             'confirmation_state': ActionConfirmationStatus.CONFIRMED,
                         },