diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index ba2181c643..37321a267c 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -157,12 +157,14 @@ def initialize_runtime( action = CmdRunAction( command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc""" ) + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """) + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -201,18 +203,21 @@ def initialize_runtime( '/swe_util/', ) action = CmdRunAction(command='cat ~/.bashrc') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 action = CmdRunAction(command='source ~/.bashrc') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -234,6 +239,7 @@ def initialize_runtime( assert obs.exit_code == 0 action = CmdRunAction(command='git reset --hard') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -242,6 +248,7 @@ def initialize_runtime( action = CmdRunAction( command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done' ) + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -269,18 +276,21 @@ def complete_runtime( workspace_dir_name = _get_swebench_workspace_dir_name(instance) action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 action = CmdRunAction(command='git config --global core.pager ""') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 action = CmdRunAction(command='git add -A') + action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) diff --git a/openhands/events/action/commands.py b/openhands/events/action/commands.py index 41ad0104c4..988ccbcec9 100644 --- a/openhands/events/action/commands.py +++ b/openhands/events/action/commands.py @@ -13,6 +13,10 @@ from openhands.events.action.action import ( class CmdRunAction(Action): command: str thought: str = '' + blocking: bool = False + # If False, the command will be run in a non-blocking / interactive way + # The partial command outputs will be returned as output observation. + # If True, the command will be run for max .timeout seconds. keep_prompt: bool = True # if True, the command prompt will be kept in the command output observation # Example of command output: diff --git a/openhands/events/event.py b/openhands/events/event.py index d4a7f915e0..a849f2a61d 100644 --- a/openhands/events/event.py +++ b/openhands/events/event.py @@ -49,3 +49,8 @@ class Event: @timeout.setter def timeout(self, value: int | None) -> None: self._timeout = value + + # Check if .blocking is an attribute of the event + if hasattr(self, 'blocking'): + # .blocking needs to be set to True if .timeout is set + self.blocking = True diff --git a/openhands/runtime/client/client.py b/openhands/runtime/client/client.py index 96df63f614..f26b739ca8 100644 --- a/openhands/runtime/client/client.py +++ b/openhands/runtime/client/client.py @@ -326,9 +326,11 @@ class RuntimeClient: else: output, exit_code = self._execute_bash( command, - timeout=SOFT_TIMEOUT_SECONDS, + timeout=SOFT_TIMEOUT_SECONDS + if not action.blocking + else action.timeout, keep_prompt=action.keep_prompt, - kill_on_timeout=False, + kill_on_timeout=False if not action.blocking else True, ) if all_output: # previous output already exists with prompt "user@hostname:working_dir #"" diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_001.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_001.log index 06bc351be1..221e103775 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_001.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software architect. Your team has inherited an existing codebase, and need to finish a project: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log index 50853ba584..d0d2de3ab3 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software architect. Your team has inherited an existing codebase, and need to finish a project: @@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}] +[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log index 200ea096c0..65df2e8c13 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software architect. Your team has inherited an existing codebase, and need to finish a project: @@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}] +[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_004.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_004.log index 946bb4950c..e4fff9eba3 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_004.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_004.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_005.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_005.log index f1518f188d..3f96a46165 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_005.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_005.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_006.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_006.log index 6e6649226d..9a267bc776 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_006.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_006.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_007.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_007.log index 5a32e02b4c..596f5e0d41 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_007.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_007.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_008.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_008.log index b5683d733d..fd992e6f70 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_008.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_008.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_001.log index d7afd364ce..8108bde827 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_001.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software architect. Your team has inherited an existing codebase, and need to finish a project: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log index b7dd18793d..aa890b1726 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software architect. Your team has inherited an existing codebase, and need to finish a project: @@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}] +[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_003.log index 5b6363c8bb..74985476f9 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_003.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_004.log index 36bf347a65..4cb33a442d 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_004.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_004.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log index 5b36438e8e..99188b7b90 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: @@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}] +[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log index 7b7be6feb1..af0d7aef2f 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: @@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}] +[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_007.log index bf0ea12636..8921d4edfc 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_007.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_007.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_008.log index 5ddd4dd4eb..53c40716d0 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_008.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_008.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log index a1b6117c3f..ae25dcf09f 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: @@ -39,7 +35,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}] +[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log index 71235b4455..663cda3620 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_001.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are in charge of accomplishing the following task: Write a git commit message for the current staging area. Do not ask me for confirmation at any point. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_002.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_002.log index d7237c7724..c6490cdec2 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_002.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_002.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a responsible software engineer and always write good commit messages. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log index 9e72312f28..d4affba2f2 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a responsible software engineer and always write good commit messages. @@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}] +[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}] If the last item in the history is an error, you should try to fix it. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log index 0d8ce0b1eb..84b70bd2d5 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a responsible software engineer and always write good commit messages. @@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] +[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] If the last item in the history is an error, you should try to fix it. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log index 497904ac9b..f7090b8c86 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a responsible software engineer and always write good commit messages. @@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] +[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] If the last item in the history is an error, you should try to fix it. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log index 5541ae469a..29cfcf0acb 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a responsible software engineer and always write good commit messages. @@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] +[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] If the last item in the history is an error, you should try to fix it. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log index d017d72437..4c3d9485f8 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are a responsible software engineer and always write good commit messages. @@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] +[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}] If the last item in the history is an error, you should try to fix it. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log index c869abe630..e85e6d7d19 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_008.log @@ -1,7 +1,3 @@ - - ----------- - # Task You are in charge of accomplishing the following task: Write a git commit message for the current staging area. Do not ask me for confirmation at any point. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log index 00389e2e9f..720563fe8c 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_001.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log index f9e7cf5b63..84b0d3c4f5 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_002.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log index c1aa30ab70..df5c3d5f18 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_003.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log index c03ff9801b..3ddb46bedc 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_004.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log index fe13547223..b1082d0bda 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_005.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log index 7d762e8029..feb320d899 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_006.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log index dd92e38b6c..d300bf45b3 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_007.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log index 237f8f8046..0e61932075 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_008.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log index 152b99d118..520b841554 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_009.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log index 58d72ee620..b122f016ad 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. @@ -198,6 +194,7 @@ ten actions--more happened before that. "args": { "command": "bash hello.sh", "thought": "", + "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed" } diff --git a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log index e1bb7071bc..03378e8d9e 100644 --- a/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log +++ b/tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log @@ -1,8 +1,4 @@ - ----------- - - # Task You're a diligent software engineer AI. You can't see, draw, or interact with a browser, but you can read and write files, and you can run commands, and you can think. @@ -197,6 +193,7 @@ ten actions--more happened before that. "args": { "command": "bash hello.sh", "thought": "", + "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed" } diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py index ac47f87e64..7cd9b0d178 100644 --- a/tests/unit/test_action_serialization.py +++ b/tests/unit/test_action_serialization.py @@ -84,6 +84,7 @@ def test_cmd_run_action_serialization_deserialization(): original_action_dict = { 'action': 'run', 'args': { + 'blocking': False, 'command': 'echo "Hello world"', 'thought': '', 'keep_prompt': True, diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py index 4faff92c64..f4c0503f58 100644 --- a/tests/unit/test_security.py +++ b/tests/unit/test_security.py @@ -219,6 +219,7 @@ def test_unsafe_bash_command(temp_dir: str): function=Function( name=ActionType.RUN, arguments={ + 'blocking': False, 'command': 'ls', 'keep_prompt': True, 'is_confirmed': ActionConfirmationStatus.CONFIRMED,