mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
[eval] increase timeout for SWEBench eval init/complete (#3829)
* [eval] increase timeout for swebench eval init/complete * allow CmdRunAction to optionally block when .timeout is setted * fix unit test for serialization * fix unit tests for security analyzer * fix integration tests * add more timeout
This commit is contained in:
parent
4d8f812f88
commit
2fe2f4c530
@ -157,12 +157,14 @@ def initialize_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc"""
|
||||
)
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -201,18 +203,21 @@ def initialize_runtime(
|
||||
'/swe_util/',
|
||||
)
|
||||
action = CmdRunAction(command='cat ~/.bashrc')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='source ~/.bashrc')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -234,6 +239,7 @@ def initialize_runtime(
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='git reset --hard')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -242,6 +248,7 @@ def initialize_runtime(
|
||||
action = CmdRunAction(
|
||||
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
|
||||
)
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -269,18 +276,21 @@ def complete_runtime(
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='git config --global core.pager ""')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='git add -A')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -13,6 +13,10 @@ from openhands.events.action.action import (
|
||||
class CmdRunAction(Action):
|
||||
command: str
|
||||
thought: str = ''
|
||||
blocking: bool = False
|
||||
# If False, the command will be run in a non-blocking / interactive way
|
||||
# The partial command outputs will be returned as output observation.
|
||||
# If True, the command will be run for max .timeout seconds.
|
||||
keep_prompt: bool = True
|
||||
# if True, the command prompt will be kept in the command output observation
|
||||
# Example of command output:
|
||||
|
||||
@ -49,3 +49,8 @@ class Event:
|
||||
@timeout.setter
|
||||
def timeout(self, value: int | None) -> None:
|
||||
self._timeout = value
|
||||
|
||||
# Check if .blocking is an attribute of the event
|
||||
if hasattr(self, 'blocking'):
|
||||
# .blocking needs to be set to True if .timeout is set
|
||||
self.blocking = True
|
||||
|
||||
@ -326,9 +326,11 @@ class RuntimeClient:
|
||||
else:
|
||||
output, exit_code = self._execute_bash(
|
||||
command,
|
||||
timeout=SOFT_TIMEOUT_SECONDS,
|
||||
timeout=SOFT_TIMEOUT_SECONDS
|
||||
if not action.blocking
|
||||
else action.timeout,
|
||||
keep_prompt=action.keep_prompt,
|
||||
kill_on_timeout=False,
|
||||
kill_on_timeout=False if not action.blocking else True,
|
||||
)
|
||||
if all_output:
|
||||
# previous output already exists with prompt "user@hostname:working_dir #""
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software architect. Your team has inherited an existing codebase, and
|
||||
need to finish a project:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software architect. Your team has inherited an existing codebase, and
|
||||
need to finish a project:
|
||||
@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
|
||||
|
||||
## Format
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software architect. Your team has inherited an existing codebase, and
|
||||
need to finish a project:
|
||||
@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
|
||||
|
||||
## Format
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a quality assurance engineer. Another engineer has made changes to the
|
||||
codebase which are supposed to solve this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a quality assurance engineer. Another engineer has made changes to the
|
||||
codebase which are supposed to solve this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software architect. Your team has inherited an existing codebase, and
|
||||
need to finish a project:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software architect. Your team has inherited an existing codebase, and
|
||||
need to finish a project:
|
||||
@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
|
||||
|
||||
## Format
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
|
||||
[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
|
||||
|
||||
## Format
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
@ -41,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
|
||||
[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
|
||||
|
||||
## Format
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a quality assurance engineer. Another engineer has made changes to the
|
||||
codebase which are supposed to solve this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a quality assurance engineer. Another engineer has made changes to the
|
||||
codebase which are supposed to solve this task:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a quality assurance engineer. Another engineer has made changes to the
|
||||
codebase which are supposed to solve this task:
|
||||
@ -39,7 +35,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
|
||||
[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
|
||||
|
||||
## Format
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are in charge of accomplishing the following task:
|
||||
Write a git commit message for the current staging area. Do not ask me for confirmation at any point.
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
@ -28,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
# Task
|
||||
You are in charge of accomplishing the following task:
|
||||
Write a git commit message for the current staging area. Do not ask me for confirmation at any point.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
@ -198,6 +194,7 @@ ten actions--more happened before that.
|
||||
"args": {
|
||||
"command": "bash hello.sh",
|
||||
"thought": "",
|
||||
"blocking": false,
|
||||
"keep_prompt": true,
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
# Task
|
||||
You're a diligent software engineer AI. You can't see, draw, or interact with a
|
||||
browser, but you can read and write files, and you can run commands, and you can think.
|
||||
@ -197,6 +193,7 @@ ten actions--more happened before that.
|
||||
"args": {
|
||||
"command": "bash hello.sh",
|
||||
"thought": "",
|
||||
"blocking": false,
|
||||
"keep_prompt": true,
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
|
||||
@ -84,6 +84,7 @@ def test_cmd_run_action_serialization_deserialization():
|
||||
original_action_dict = {
|
||||
'action': 'run',
|
||||
'args': {
|
||||
'blocking': False,
|
||||
'command': 'echo "Hello world"',
|
||||
'thought': '',
|
||||
'keep_prompt': True,
|
||||
|
||||
@ -219,6 +219,7 @@ def test_unsafe_bash_command(temp_dir: str):
|
||||
function=Function(
|
||||
name=ActionType.RUN,
|
||||
arguments={
|
||||
'blocking': False,
|
||||
'command': 'ls',
|
||||
'keep_prompt': True,
|
||||
'is_confirmed': ActionConfirmationStatus.CONFIRMED,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user