CodeActAgent: Fix hack for multiple edits in same command (#1684)

* Fix edit hack for multiple edits in same command

This PR changes ([\s\S]*) to ([\s\S]*?) to make the capturing
group non-greedy. This change ensures that the regex captures
the smallest set of characters that extends up to the first
end_of_edit it encounters, rather than extending across multiple
edit commands.

Without the fix, a bash command consisting of multiple edits
would be corrupt and lead to unexpected edit results.
This commit is contained in:
Boxuan Li 2024-05-10 23:32:09 -07:00 committed by GitHub
parent 1787a7304e
commit bde12f4a09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
47 changed files with 5418 additions and 47 deletions

View File

@ -72,7 +72,7 @@ def swe_agent_edit_hack(bash_command: str) -> str:
# edit\s(\d+):(\d+)([\s\S]*)end_of_edit
# replace
bash_command = re.sub(
r'edit\s(\d+):(\d+)([\s\S]*)end_of_edit',
r'edit\s(\d+):(\d+)([\s\S]*?)end_of_edit',
r'edit \1:\2 <<EOF\3EOF',
bash_command,
)

View File

@ -94,6 +94,8 @@ async def main(task_str: str = ''):
]:
await asyncio.sleep(1) # Give back control for a tick, so the agent can run
return controller
if __name__ == '__main__':
asyncio.run(main())

View File

@ -46,16 +46,13 @@ of agents with real LLMs are stored under `mock/AgentName/TestName` folders.
## Run Integration Tests
Take a look at `run-integration-tests.yml` to learn how integration tests are
launched in CI environment. Assuming you want to use `workspace` for testing, an
example is as follows:
launched in CI environment. You can also simply run:
```bash
rm -rf workspace; AGENT=PlannerAgent \
WORKSPACE_BASE="/Users/admin/OpenDevin/workspace" WORKSPACE_MOUNT_PATH="/Users/admin/OpenDevin/workspace" MAX_ITERATIONS=10 \
poetry run pytest -s ./tests/integration
TEST_ONLY=true ./tests/integration/regenerate.sh
```
Note: in order to run integration tests correctly, please ensure your workspace is empty.
to run all integration tests until the first failure.
## Regenerate Integration Tests
@ -64,7 +61,8 @@ by running:
```bash
./tests/integration/regenerate.sh
```
Note that this will make several calls to your LLM_MODEL, potentially costing money! If you don't want
Note that this will run existing tests first and call real LLM_MODEL only for
failed tests, but it still costs money! If you don't want
to cover the cost, ask one of the maintainers to regenerate for you.
You might also be able to fix the tests by hand.

View File

@ -7,6 +7,7 @@ import pytest
from litellm import completion
script_dir = os.path.dirname(os.path.realpath(__file__))
workspace_path = os.getenv('WORKSPACE_BASE')
def filter_out_symbols(input):
@ -62,11 +63,7 @@ def mock_user_response(*args, test_name, **kwargs):
STDIN input for the agent to read.
"""
user_response_file = os.path.join(
script_dir,
'mock',
os.environ.get('AGENT'),
test_name,
'user_responses.log'
script_dir, 'mock', os.environ.get('AGENT'), test_name, 'user_responses.log'
)
if not os.path.exists(user_response_file):
return ''
@ -91,10 +88,30 @@ def mock_completion(*args, test_name, **kwargs):
def patch_completion(monkeypatch, request):
test_name = request.node.name
# Mock LLM completion
monkeypatch.setattr('opendevin.llm.llm.litellm_completion', partial(mock_completion, test_name=test_name))
monkeypatch.setattr(
'opendevin.llm.llm.litellm_completion',
partial(mock_completion, test_name=test_name),
)
# Mock user input (only for tests that have user_responses.log)
user_responses_str = mock_user_response(test_name=test_name)
if user_responses_str:
user_responses = io.StringIO(user_responses_str)
monkeypatch.setattr('sys.stdin', user_responses)
def clean_up():
assert workspace_path is not None
if os.path.exists(workspace_path):
for file in os.listdir(workspace_path):
os.remove(os.path.join(workspace_path, file))
@pytest.fixture(autouse=True)
def resource_setup():
clean_up()
if not os.path.exists(workspace_path):
os.makedirs(workspace_path)
# Yield to test execution
yield
clean_up()

View File

@ -0,0 +1,219 @@
----------
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
If the assistant encounters an import error in IPython for a newly installed package, they should try to restart the kernel and import the package again. IPython kernel can be re-started by:
<execute_ipython>
import IPython
IPython.Application.instance().kernel.do_shutdown(True) # Restart the kernel
</execute_ipython>
Apart from the standard bash commands, you can also use the following special commands in <execute_bash> environment:
open <path> [<line_number>] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line
goto <line_number> - moves the window to show <line_number>
scroll_down - moves the window down {WINDOW} lines
scroll_up - moves the window down {WINDOW} lines
create <filename> - creates and opens a new file with the given name
search_dir <search_term> [<dir>] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory
search_file <search_term> [<file>] - searches for search_term in file. If file is not provided, searches in the current open file
find_file <file_name> [<dir>] - finds all files with the given name in dir. If dir is not provided, searches in the current directory
edit <start_line>:<end_line>
<replacement_text>
end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the <replacement text> will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again.
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise, but do express their thoughts.
Try to include one of <execute_ipython> or <execute_bash> in each of your responses, unless it is a direct answer to a question OR a message to the user.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
----------
Here is an example of how you can interact with the environment for task solving:
--- START OF EXAMPLE ---
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
ASSISTANT:
Sure! Let me write the Python code for starting a web server and save it to a file `app.py`:
<execute_ipython>
CODE='''
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
'''
with open('app.py', 'w') as f:
f.write(CODE)
</execute_ipython>
USER:
Observation:
[Code executed successfully with no output]
ASSISTANT:
I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
Traceback (most recent call last):
File "/workspace/app.py", line 2, in <module>
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT:
It seems that Flask is not installed. Let me install Flask for you:
<execute_bash>
pip install flask
</execute_bash>
USER:
Observation:
Defaulting to user installation because normal site-packages is not writeable
Collecting flask
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting blinker>=1.6.2
Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
Collecting click>=8.1.3
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting itsdangerous>=2.1.2
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT:
Now that Flask is installed, let me run the Python file again:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 124
ASSISTANT:
Let me check the server log again:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
* Serving Flask app 'app'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
Sure! Let me stop the server first:
<execute_bash>
kill 124
</execute_bash>
USER:
Observation:
[1]+ Terminated python3 app.py > server.log 2>&1
ASSISTANT:
The server has been stopped. Let me open the Python file and modify it.
<execute_bash>
open app.py
</execute_bash>
USER:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return str(numbers)
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
ASSISTANT:
I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
<execute_bash>
edit 8:8 <<EOF
return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
EOF
</execute_bash>
USER:
Observation:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
ASSISTANT:
The file has been updated. Let me run the Python file again with the new changes:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
--- END OF EXAMPLE ---
NOW, LET'S START!
Fix typos in bad.txt. Do not ask me for confirmation at any point.

View File

@ -0,0 +1,236 @@
----------
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
If the assistant encounters an import error in IPython for a newly installed package, they should try to restart the kernel and import the package again. IPython kernel can be re-started by:
<execute_ipython>
import IPython
IPython.Application.instance().kernel.do_shutdown(True) # Restart the kernel
</execute_ipython>
Apart from the standard bash commands, you can also use the following special commands in <execute_bash> environment:
open <path> [<line_number>] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line
goto <line_number> - moves the window to show <line_number>
scroll_down - moves the window down {WINDOW} lines
scroll_up - moves the window down {WINDOW} lines
create <filename> - creates and opens a new file with the given name
search_dir <search_term> [<dir>] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory
search_file <search_term> [<file>] - searches for search_term in file. If file is not provided, searches in the current open file
find_file <file_name> [<dir>] - finds all files with the given name in dir. If dir is not provided, searches in the current directory
edit <start_line>:<end_line>
<replacement_text>
end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the <replacement text> will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again.
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise, but do express their thoughts.
Try to include one of <execute_ipython> or <execute_bash> in each of your responses, unless it is a direct answer to a question OR a message to the user.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
----------
Here is an example of how you can interact with the environment for task solving:
--- START OF EXAMPLE ---
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
ASSISTANT:
Sure! Let me write the Python code for starting a web server and save it to a file `app.py`:
<execute_ipython>
CODE='''
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
'''
with open('app.py', 'w') as f:
f.write(CODE)
</execute_ipython>
USER:
Observation:
[Code executed successfully with no output]
ASSISTANT:
I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
Traceback (most recent call last):
File "/workspace/app.py", line 2, in <module>
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT:
It seems that Flask is not installed. Let me install Flask for you:
<execute_bash>
pip install flask
</execute_bash>
USER:
Observation:
Defaulting to user installation because normal site-packages is not writeable
Collecting flask
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting blinker>=1.6.2
Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
Collecting click>=8.1.3
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting itsdangerous>=2.1.2
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT:
Now that Flask is installed, let me run the Python file again:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 124
ASSISTANT:
Let me check the server log again:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
* Serving Flask app 'app'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
Sure! Let me stop the server first:
<execute_bash>
kill 124
</execute_bash>
USER:
Observation:
[1]+ Terminated python3 app.py > server.log 2>&1
ASSISTANT:
The server has been stopped. Let me open the Python file and modify it.
<execute_bash>
open app.py
</execute_bash>
USER:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return str(numbers)
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
ASSISTANT:
I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
<execute_bash>
edit 8:8 <<EOF
return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
EOF
</execute_bash>
USER:
Observation:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
ASSISTANT:
The file has been updated. Let me run the Python file again with the new changes:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
--- END OF EXAMPLE ---
NOW, LET'S START!
Fix typos in bad.txt. Do not ask me for confirmation at any point.
----------
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file and display its contents.
<execute_bash>
cat bad.txt
</execute_bash>
----------
OBSERVATION:
This is a stupid typoo.
Really?
No mor typos!
Enjoy!
[Command -1 finished with exit code 0]]

View File

@ -0,0 +1,264 @@
----------
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
If the assistant encounters an import error in IPython for a newly installed package, they should try to restart the kernel and import the package again. IPython kernel can be re-started by:
<execute_ipython>
import IPython
IPython.Application.instance().kernel.do_shutdown(True) # Restart the kernel
</execute_ipython>
Apart from the standard bash commands, you can also use the following special commands in <execute_bash> environment:
open <path> [<line_number>] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line
goto <line_number> - moves the window to show <line_number>
scroll_down - moves the window down {WINDOW} lines
scroll_up - moves the window down {WINDOW} lines
create <filename> - creates and opens a new file with the given name
search_dir <search_term> [<dir>] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory
search_file <search_term> [<file>] - searches for search_term in file. If file is not provided, searches in the current open file
find_file <file_name> [<dir>] - finds all files with the given name in dir. If dir is not provided, searches in the current directory
edit <start_line>:<end_line>
<replacement_text>
end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the <replacement text> will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again.
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise, but do express their thoughts.
Try to include one of <execute_ipython> or <execute_bash> in each of your responses, unless it is a direct answer to a question OR a message to the user.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
----------
Here is an example of how you can interact with the environment for task solving:
--- START OF EXAMPLE ---
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
ASSISTANT:
Sure! Let me write the Python code for starting a web server and save it to a file `app.py`:
<execute_ipython>
CODE='''
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
'''
with open('app.py', 'w') as f:
f.write(CODE)
</execute_ipython>
USER:
Observation:
[Code executed successfully with no output]
ASSISTANT:
I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
Traceback (most recent call last):
File "/workspace/app.py", line 2, in <module>
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT:
It seems that Flask is not installed. Let me install Flask for you:
<execute_bash>
pip install flask
</execute_bash>
USER:
Observation:
Defaulting to user installation because normal site-packages is not writeable
Collecting flask
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting blinker>=1.6.2
Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
Collecting click>=8.1.3
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting itsdangerous>=2.1.2
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT:
Now that Flask is installed, let me run the Python file again:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 124
ASSISTANT:
Let me check the server log again:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
* Serving Flask app 'app'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
Sure! Let me stop the server first:
<execute_bash>
kill 124
</execute_bash>
USER:
Observation:
[1]+ Terminated python3 app.py > server.log 2>&1
ASSISTANT:
The server has been stopped. Let me open the Python file and modify it.
<execute_bash>
open app.py
</execute_bash>
USER:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return str(numbers)
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
ASSISTANT:
I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
<execute_bash>
edit 8:8 <<EOF
return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
EOF
</execute_bash>
USER:
Observation:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
ASSISTANT:
The file has been updated. Let me run the Python file again with the new changes:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
--- END OF EXAMPLE ---
NOW, LET'S START!
Fix typos in bad.txt. Do not ask me for confirmation at any point.
----------
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file and display its contents.
<execute_bash>
cat bad.txt
</execute_bash>
----------
OBSERVATION:
This is a stupid typoo.
Really?
No mor typos!
Enjoy!
[Command -1 finished with exit code 0]]
----------
I've identified the typos in the file `bad.txt`. Here are the corrections:
- "typoo" should be "typo"
- "mor" should be "more"
I will now proceed to correct these typos in the file.
<execute_bash>
edit 1:1
This is a stupid typo.
end_of_edit
edit 3:3
No more typos!
end_of_edit
</execute_bash>
----------
OBSERVATION:
> This is a stupid typo.
> EOF
No file open. Use the `open` command first.edit 3:3 <<EOF
> No more typos!
> EOF
No file open. Use the `open` command first.
[Command -1 finished with exit code 0]]

View File

@ -0,0 +1,282 @@
----------
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
If the assistant encounters an import error in IPython for a newly installed package, they should try to restart the kernel and import the package again. IPython kernel can be re-started by:
<execute_ipython>
import IPython
IPython.Application.instance().kernel.do_shutdown(True) # Restart the kernel
</execute_ipython>
Apart from the standard bash commands, you can also use the following special commands in <execute_bash> environment:
open <path> [<line_number>] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line
goto <line_number> - moves the window to show <line_number>
scroll_down - moves the window down {WINDOW} lines
scroll_up - moves the window down {WINDOW} lines
create <filename> - creates and opens a new file with the given name
search_dir <search_term> [<dir>] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory
search_file <search_term> [<file>] - searches for search_term in file. If file is not provided, searches in the current open file
find_file <file_name> [<dir>] - finds all files with the given name in dir. If dir is not provided, searches in the current directory
edit <start_line>:<end_line>
<replacement_text>
end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the <replacement text> will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again.
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise, but do express their thoughts.
Try to include one of <execute_ipython> or <execute_bash> in each of your responses, unless it is a direct answer to a question OR a message to the user.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
----------
Here is an example of how you can interact with the environment for task solving:
--- START OF EXAMPLE ---
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
ASSISTANT:
Sure! Let me write the Python code for starting a web server and save it to a file `app.py`:
<execute_ipython>
CODE='''
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
'''
with open('app.py', 'w') as f:
f.write(CODE)
</execute_ipython>
USER:
Observation:
[Code executed successfully with no output]
ASSISTANT:
I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
Traceback (most recent call last):
File "/workspace/app.py", line 2, in <module>
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT:
It seems that Flask is not installed. Let me install Flask for you:
<execute_bash>
pip install flask
</execute_bash>
USER:
Observation:
Defaulting to user installation because normal site-packages is not writeable
Collecting flask
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting blinker>=1.6.2
Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
Collecting click>=8.1.3
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting itsdangerous>=2.1.2
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT:
Now that Flask is installed, let me run the Python file again:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 124
ASSISTANT:
Let me check the server log again:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
* Serving Flask app 'app'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
Sure! Let me stop the server first:
<execute_bash>
kill 124
</execute_bash>
USER:
Observation:
[1]+ Terminated python3 app.py > server.log 2>&1
ASSISTANT:
The server has been stopped. Let me open the Python file and modify it.
<execute_bash>
open app.py
</execute_bash>
USER:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return str(numbers)
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
ASSISTANT:
I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
<execute_bash>
edit 8:8 <<EOF
return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
EOF
</execute_bash>
USER:
Observation:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
ASSISTANT:
The file has been updated. Let me run the Python file again with the new changes:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
--- END OF EXAMPLE ---
NOW, LET'S START!
Fix typos in bad.txt. Do not ask me for confirmation at any point.
----------
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file and display its contents.
<execute_bash>
cat bad.txt
</execute_bash>
----------
OBSERVATION:
This is a stupid typoo.
Really?
No mor typos!
Enjoy!
[Command -1 finished with exit code 0]]
----------
I've identified the typos in the file `bad.txt`. Here are the corrections:
- "typoo" should be "typo"
- "mor" should be "more"
I will now proceed to correct these typos in the file.
<execute_bash>
edit 1:1
This is a stupid typo.
end_of_edit
edit 3:3
No more typos!
end_of_edit
</execute_bash>
----------
OBSERVATION:
> This is a stupid typo.
> EOF
No file open. Use the `open` command first.edit 3:3 <<EOF
> No more typos!
> EOF
No file open. Use the `open` command first.
[Command -1 finished with exit code 0]]
----------
It seems I forgot to open the file before attempting to edit it. Let me open `bad.txt` and then proceed with the corrections.
<execute_bash>
open bad.txt
</execute_bash>
----------
OBSERVATION:
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typoo.
2:Really?
3:No mor typos!
4:Enjoy!
[Command -1 finished with exit code 0]]

View File

@ -0,0 +1,318 @@
----------
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
If the assistant encounters an import error in IPython for a newly installed package, they should try to restart the kernel and import the package again. IPython kernel can be re-started by:
<execute_ipython>
import IPython
IPython.Application.instance().kernel.do_shutdown(True) # Restart the kernel
</execute_ipython>
Apart from the standard bash commands, you can also use the following special commands in <execute_bash> environment:
open <path> [<line_number>] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line
goto <line_number> - moves the window to show <line_number>
scroll_down - moves the window down {WINDOW} lines
scroll_up - moves the window down {WINDOW} lines
create <filename> - creates and opens a new file with the given name
search_dir <search_term> [<dir>] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory
search_file <search_term> [<file>] - searches for search_term in file. If file is not provided, searches in the current open file
find_file <file_name> [<dir>] - finds all files with the given name in dir. If dir is not provided, searches in the current directory
edit <start_line>:<end_line>
<replacement_text>
end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the <replacement text> will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again.
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise, but do express their thoughts.
Try to include one of <execute_ipython> or <execute_bash> in each of your responses, unless it is a direct answer to a question OR a message to the user.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
----------
Here is an example of how you can interact with the environment for task solving:
--- START OF EXAMPLE ---
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
ASSISTANT:
Sure! Let me write the Python code for starting a web server and save it to a file `app.py`:
<execute_ipython>
CODE='''
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
'''
with open('app.py', 'w') as f:
f.write(CODE)
</execute_ipython>
USER:
Observation:
[Code executed successfully with no output]
ASSISTANT:
I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
Traceback (most recent call last):
File "/workspace/app.py", line 2, in <module>
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT:
It seems that Flask is not installed. Let me install Flask for you:
<execute_bash>
pip install flask
</execute_bash>
USER:
Observation:
Defaulting to user installation because normal site-packages is not writeable
Collecting flask
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting blinker>=1.6.2
Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
Collecting click>=8.1.3
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting itsdangerous>=2.1.2
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT:
Now that Flask is installed, let me run the Python file again:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 124
ASSISTANT:
Let me check the server log again:
<execute_bash>
cat server.log
</execute_bash>
USER:
Observation:
* Serving Flask app 'app'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
Sure! Let me stop the server first:
<execute_bash>
kill 124
</execute_bash>
USER:
Observation:
[1]+ Terminated python3 app.py > server.log 2>&1
ASSISTANT:
The server has been stopped. Let me open the Python file and modify it.
<execute_bash>
open app.py
</execute_bash>
USER:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return str(numbers)
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
ASSISTANT:
I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
<execute_bash>
edit 8:8 <<EOF
return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
EOF
</execute_bash>
USER:
Observation:
[File: /workspace/app.py (11 lines total)]
1:
2:from flask import Flask
3:app = Flask(__name__)
4:
5:@app.route('/')
6:def index():
7: numbers = list(range(1, 11))
8: return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
9:
10:if __name__ == '__main__':
11: app.run(port=5000)
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
ASSISTANT:
The file has been updated. Let me run the Python file again with the new changes:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
--- END OF EXAMPLE ---
NOW, LET'S START!
Fix typos in bad.txt. Do not ask me for confirmation at any point.
----------
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file and display its contents.
<execute_bash>
cat bad.txt
</execute_bash>
----------
OBSERVATION:
This is a stupid typoo.
Really?
No mor typos!
Enjoy!
[Command -1 finished with exit code 0]]
----------
I've identified the typos in the file `bad.txt`. Here are the corrections:
- "typoo" should be "typo"
- "mor" should be "more"
I will now proceed to correct these typos in the file.
<execute_bash>
edit 1:1
This is a stupid typo.
end_of_edit
edit 3:3
No more typos!
end_of_edit
</execute_bash>
----------
OBSERVATION:
> This is a stupid typo.
> EOF
No file open. Use the `open` command first.edit 3:3 <<EOF
> No more typos!
> EOF
No file open. Use the `open` command first.
[Command -1 finished with exit code 0]]
----------
It seems I forgot to open the file before attempting to edit it. Let me open `bad.txt` and then proceed with the corrections.
<execute_bash>
open bad.txt
</execute_bash>
----------
OBSERVATION:
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typoo.
2:Really?
3:No mor typos!
4:Enjoy!
[Command -1 finished with exit code 0]]
----------
Now that the file is open, I'll proceed with the corrections.
<execute_bash>
edit 1:1
This is a stupid typo.
end_of_edit
edit 3:3
No more typos!
end_of_edit
</execute_bash>
----------
OBSERVATION:
> This is a stupid typo.
> EOF
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typo.
2:Really?
3:No mor typos!
4:Enjoy!
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.edit 3:3 <<EOF
> No more typos!
> EOF
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typo.
2:Really?
3:No more typos!
4:Enjoy!
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
[Command -1 finished with exit code 0]]

View File

@ -0,0 +1,4 @@
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file and display its contents.
<execute_bash>
cat bad.txt

View File

@ -0,0 +1,13 @@
I've identified the typos in the file `bad.txt`. Here are the corrections:
- "typoo" should be "typo"
- "mor" should be "more"
I will now proceed to correct these typos in the file.
<execute_bash>
edit 1:1
This is a stupid typo.
end_of_edit
edit 3:3
No more typos!
end_of_edit

View File

@ -0,0 +1,4 @@
It seems I forgot to open the file before attempting to edit it. Let me open `bad.txt` and then proceed with the corrections.
<execute_bash>
open bad.txt

View File

@ -0,0 +1,9 @@
Now that the file is open, I'll proceed with the corrections.
<execute_bash>
edit 1:1
This is a stupid typo.
end_of_edit
edit 3:3
No more typos!
end_of_edit

View File

@ -0,0 +1,8 @@
The typos in `bad.txt` have been successfully corrected. The file now reads correctly as follows:
1. This is a stupid typo.
2. Really?
3. No more typos!
4. Enjoy!
If there's anything else you need, feel free to ask!

View File

@ -0,0 +1 @@
/exit

View File

@ -0,0 +1,371 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
You've been thinking a lot lately. Maybe it's time to take action?

View File

@ -0,0 +1,386 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "pwd",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "/workspace",
"extras": {
"command_id": -1,
"command": "pwd",
"exit_code": 0
}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.

View File

@ -0,0 +1,403 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "pwd",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "/workspace",
"extras": {
"command_id": -1,
"command": "pwd",
"exit_code": 0
}
},
{
"action": "run",
"args": {
"command": "ls",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "bad.txt",
"extras": {
"command_id": -1,
"command": "ls",
"exit_code": 0
}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.

View File

@ -0,0 +1,419 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "pwd",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "/workspace",
"extras": {
"command_id": -1,
"command": "pwd",
"exit_code": 0
}
},
{
"action": "run",
"args": {
"command": "ls",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "bad.txt",
"extras": {
"command_id": -1,
"command": "ls",
"exit_code": 0
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.

View File

@ -0,0 +1,436 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "pwd",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "/workspace",
"extras": {
"command_id": -1,
"command": "pwd",
"exit_code": 0
}
},
{
"action": "run",
"args": {
"command": "ls",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "bad.txt",
"extras": {
"command_id": -1,
"command": "ls",
"exit_code": 0
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "write",
"content": "",
"extras": {
"path": "bad.txt"
}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.

View File

@ -0,0 +1,452 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "pwd",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "/workspace",
"extras": {
"command_id": -1,
"command": "pwd",
"exit_code": 0
}
},
{
"action": "run",
"args": {
"command": "ls",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "bad.txt",
"extras": {
"command_id": -1,
"command": "ls",
"exit_code": 0
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "write",
"content": "",
"extras": {
"path": "bad.txt"
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n",
"extras": {
"path": "bad.txt"
}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.

View File

@ -0,0 +1,464 @@
----------
You're a thoughtful robot. Your main task is this:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
Don't expand the scope of your task--just complete it as written.
This is your internal monologue, in JSON format:
[
{
"action": "message",
"args": {
"content": "I exist!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Hmm...looks like I can type in a command line prompt",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Looks like I have a web browser too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like I have some kind of short term memory.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Each of my thoughts seems to be stored in a JSON array.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems whatever I say next will be added as an object to the list.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Fortunately I have long term memory!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try it out!",
"wait_for_response": false
}
},
{
"action": "recall",
"args": {
"query": "what it is I want to do",
"thought": ""
}
},
{
"observation": "recall",
"content": "Here's what I want to do: Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"extras": {
"memories": [],
"role": "assistant"
}
},
{
"action": "message",
"args": {
"content": "How am I going to get there though?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "echo \"hello world\"",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "Cool! I bet I can write files too using the write action.",
"wait_for_response": false
}
},
{
"action": "write",
"args": {
"path": "test.js",
"content": "echo \"console.log('hello world')\"",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I just created test.js. I'll try and run it now.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "node test.js",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "hello world",
"extras": {
"command_id": 0,
"command": "",
"exit_code": 0
}
},
{
"action": "message",
"args": {
"content": "It works!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'm going to try reading it now using the read action.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "test.js",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "console.log('hello world')",
"extras": {
"path": ""
}
},
{
"action": "message",
"args": {
"content": "Nice! I can read files too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Let's try that...",
"wait_for_response": false
}
},
{
"action": "browse",
"args": {
"url": "google.com",
"thought": ""
}
},
{
"observation": "browse",
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
"extras": {
"url": "",
"status_code": 200,
"error": false
}
},
{
"action": "message",
"args": {
"content": "I can browse the web too!",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "And once I have completed my task, I can use the finish action to stop working.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "Very cool. Now to accomplish my task.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "OK so my task is to Fix typos in bad.txt. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
"wait_for_response": false
}
},
{
"action": "message",
"args": {
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
"wait_for_response": false
}
},
{
"action": "run",
"args": {
"command": "pwd",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "/workspace",
"extras": {
"command_id": -1,
"command": "pwd",
"exit_code": 0
}
},
{
"action": "run",
"args": {
"command": "ls",
"background": false,
"thought": ""
}
},
{
"observation": "run",
"content": "bad.txt",
"extras": {
"command_id": -1,
"command": "ls",
"exit_code": 0
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "write",
"content": "",
"extras": {
"path": "bad.txt"
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "message",
"args": {
"content": "I have corrected the typos in bad.txt. I should now verify that the changes are correct and that there are no other issues with the file.",
"wait_for_response": false
}
},
{
"observation": "null",
"content": "",
"extras": {}
}
]
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
What is your next single thought or action? Your response must be in JSON format.
It must be a single object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
Here are the possible actions:
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `push` - Push a branch from the current repo to github:
* `owner` - the owner of the repo to push to
* `repo` - the name of the repo to push to
* `branch` - the name of the branch to push
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, push, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
Notes:
* you are logged in as opendevin, but sudo will always work without a password.
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
* whenever an action fails, always send a `message` about why it may have happened before acting again.
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.

View File

@ -0,0 +1,9 @@
```json
{
"action": "run",
"args": {
"command": "pwd",
"background": false
}
}
```

View File

@ -0,0 +1,7 @@
{
"action": "run",
"args": {
"command": "ls",
"background": false
}
}

View File

@ -0,0 +1,6 @@
{
"action": "read",
"args": {
"path": "bad.txt"
}
}

View File

@ -0,0 +1,7 @@
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n"
}
}

View File

@ -0,0 +1,8 @@
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1
}
}

View File

@ -0,0 +1,6 @@
{
"action": "message",
"args": {
"content": "I have corrected the typos in bad.txt. I should now verify that the changes are correct and that there are no other issues with the file."
}
}

View File

@ -0,0 +1,4 @@
{
"action": "finish",
"args": {}
}

View File

@ -0,0 +1,93 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "open",
"subtasks": []
}
You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.

View File

@ -0,0 +1,106 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "in_progress",
"subtasks": []
}
You're currently working on this task:
Fix typos in bad.txt. Do not ask me for confirmation at any point..
If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress",
"thought": ""
}
}
]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.
You should think about the next action to take.

View File

@ -0,0 +1,113 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "in_progress",
"subtasks": []
}
You're currently working on this task:
Fix typos in bad.txt. Do not ask me for confirmation at any point..
If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress",
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.",
"wait_for_response": false
}
}
]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.
Look at your last thought in the history above. What does it suggest? Don't think anymore--take action.

View File

@ -0,0 +1,129 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "in_progress",
"subtasks": []
}
You're currently working on this task:
Fix typos in bad.txt. Do not ask me for confirmation at any point..
If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress",
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
}
]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.
You should think about the file you just read, what you learned from it, and how that affects your plan.

View File

@ -0,0 +1,146 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "in_progress",
"subtasks": []
}
You're currently working on this task:
Fix typos in bad.txt. Do not ask me for confirmation at any point..
If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress",
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "write",
"content": "",
"extras": {
"path": "bad.txt"
}
}
]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.
You just changed a file. You should think about how it affects your plan.

View File

@ -0,0 +1,153 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "in_progress",
"subtasks": []
}
You're currently working on this task:
Fix typos in bad.txt. Do not ask me for confirmation at any point..
If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress",
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "write",
"content": "",
"extras": {
"path": "bad.txt"
}
},
{
"action": "message",
"args": {
"content": "I have corrected the typos in bad.txt. I should now verify that the corrections were made accurately by reading the file again.",
"wait_for_response": false
}
}
]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.
Look at your last thought in the history above. What does it suggest? Don't think anymore--take action.

View File

@ -0,0 +1,169 @@
----------
# Task
You're a diligent software engineer AI. You can't see, draw, or interact with a
browser, but you can read and write files, and you can run commands, and you can think.
You've been given the following task:
Fix typos in bad.txt. Do not ask me for confirmation at any point.
## Plan
As you complete this task, you're building a plan and keeping
track of your progress. Here's a JSON representation of your plan:
{
"id": "0",
"goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.",
"state": "in_progress",
"subtasks": []
}
You're currently working on this task:
Fix typos in bad.txt. Do not ask me for confirmation at any point..
If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW.
You're responsible for managing this plan and the status of tasks in
it, by using the `add_task` and `modify_task` actions described below.
If the History below contradicts the state of any of these tasks, you
MUST modify the task using the `modify_task` action described below.
Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
a task that's already represented. Every task must be represented only once.
Tasks that are sequential MUST be siblings. They must be added in order
to their parent task.
If you mark a task as 'completed', 'verified', or 'abandoned',
all non-abandoned subtasks will be marked the same way.
So before closing a task this way, you MUST not only be sure that it has
been completed successfully--you must ALSO be sure that all its subtasks
are ready to be marked the same way.
If, and only if, ALL tasks have already been marked verified,
you MUST respond with the `finish` action.
## History
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
ten actions--more happened before that.
[
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress",
"thought": ""
}
},
{
"action": "message",
"args": {
"content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n",
"extras": {
"path": "bad.txt"
}
},
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "write",
"content": "",
"extras": {
"path": "bad.txt"
}
},
{
"action": "message",
"args": {
"content": "I have corrected the typos in bad.txt. I should now verify that the corrections were made accurately by reading the file again.",
"wait_for_response": false
}
},
{
"action": "read",
"args": {
"path": "bad.txt",
"start": 0,
"end": -1,
"thought": ""
}
},
{
"observation": "read",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n",
"extras": {
"path": "bad.txt"
}
}
]
Your most recent action is at the bottom of that history.
## Action
What is your next thought or action? Your response must be in JSON format.
It must be an object, and it must contain two fields:
* `action`, which is one of the actions below
* `args`, which is a map of key-value pairs, specifying the arguments for that action
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `message` - make a plan, set a goal, or record your thoughts. Arguments:
* `content` - the message to record
* `add_task` - add a task to your plan. Arguments:
* `parent` - the ID of the parent task
* `goal` - the goal of the task
* `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
* `modify_task` - close a task. Arguments:
* `id` - the ID of the task to close
* `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action.
You should never act twice in a row without thinking. But if your last several
actions are all `message` actions, you should consider taking a different action.
What is your next thought or action? Again, you must reply with JSON, and only with JSON.
You should think about the file you just read, what you learned from it, and how that affects your plan.

View File

@ -0,0 +1,7 @@
{
"action": "modify_task",
"args": {
"id": "0",
"state": "in_progress"
}
}

View File

@ -0,0 +1,6 @@
{
"action": "message",
"args": {
"content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections."
}
}

View File

@ -0,0 +1,6 @@
{
"action": "read",
"args": {
"path": "bad.txt"
}
}

View File

@ -0,0 +1,7 @@
{
"action": "write",
"args": {
"path": "bad.txt",
"content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n"
}
}

View File

@ -0,0 +1,6 @@
{
"action": "message",
"args": {
"content": "I have corrected the typos in bad.txt. I should now verify that the corrections were made accurately by reading the file again."
}
}

View File

@ -0,0 +1,6 @@
{
"action": "read",
"args": {
"path": "bad.txt"
}
}

View File

@ -0,0 +1,8 @@
{
"action": "modify_task",
"args": {
"id": "0",
"state": "verified",
"thought": "The typos in bad.txt have been corrected and verified by reading the file again. The content is now accurate."
}
}

View File

@ -7,41 +7,56 @@ SANDBOX_TYPE="ssh"
# FIXME: SWEAgent hangs, so it goes last
agents=("MonologueAgent" "CodeActAgent" "PlannerAgent" "SWEAgent")
tasks=("Fix typos in bad.txt." "Write a shell script 'hello.sh' that prints 'hello'.")
test_names=("test_edits" "test_write_simple_script")
# TODO: currently we only have one test: test_write_simple_script. We need to revisit
# this script when we have more than one integration test.
for agent in "${agents[@]}"; do
echo -e "\n\n\n\n========Running integration test for $agent========\n\n\n\n"
rm -rf $WORKSPACE_BASE
num_of_tests=${#tasks[@]}
# Temporarily disable 'exit on error'
set +e
SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
poetry run pytest ./tests/integration
TEST_STATUS=$?
# Re-enable 'exit on error'
set -e
if [[ $TEST_STATUS -ne 0 ]]; then
echo -e "\n\n\n\n========Test failed, regenerating test data for $agent========\n\n\n\n"
sleep 1
rm -rf logs
rm -rf logs
rm -rf _test_workspace
for ((i = 0; i < num_of_tests; i++)); do
task=${tasks[i]}
test_name=${test_names[i]}
for agent in "${agents[@]}"; do
echo -e "\n\n\n\n========Running $test_name for $agent========\n\n\n\n"
rm -rf $WORKSPACE_BASE
rm -rf tests/integration/mock/$agent/test_write_simple_script/*
mkdir $WORKSPACE_BASE
echo -e "/exit\n" | SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
poetry run python ./opendevin/core/main.py \
-i 10 \
-t "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point." \
-c $agent
mv logs/llm/**/* tests/integration/mock/$agent/test_write_simple_script/
else
echo -e "\n\n\n\n========Integration test for $agent PASSED========\n\n\n\n"
sleep 1
fi
if [ "$TEST_ONLY" = true ]; then
set -e
else
# Temporarily disable 'exit on error'
set +e
fi
SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE MAX_ITERATIONS=10 \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
poetry run pytest -s ./tests/integration/test_agent.py::$test_name
TEST_STATUS=$?
# Re-enable 'exit on error'
set -e
if [[ $TEST_STATUS -ne 0 ]]; then
echo -e "\n\n\n\n========$test_name failed, regenerating test data for $agent========\n\n\n\n"
# trick: let's not clean up $WORKSPACE_BASE folder, which might contain the
# artifacts we need that are auto-gerated by the test
sleep 1
rm -rf logs
rm -rf tests/integration/mock/$agent/$test_name/*
echo -e "/exit\n" | SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
DEBUG=true \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
poetry run python ./opendevin/core/main.py \
-i 10 \
-t "$task Do not ask me for confirmation at any point." \
-c $agent
mkdir -p tests/integration/mock/$agent/$test_name/
mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
else
echo -e "\n\n\n\n========$test_name for $agent PASSED========\n\n\n\n"
sleep 1
fi
done
done
rm -rf logs

View File

@ -1,13 +1,15 @@
import asyncio
import os
import shutil
import subprocess
import pytest
from opendevin.core.main import main
workspace_base = os.getenv('WORKSPACE_BASE')
# skip if
@pytest.mark.skipif(
os.getenv('AGENT') == 'CodeActAgent'
and os.getenv('SANDBOX_TYPE').lower() == 'exec',
@ -15,10 +17,11 @@ from opendevin.core.main import main
)
def test_write_simple_script():
task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
asyncio.run(main(task))
controller = asyncio.run(main(task))
asyncio.run(controller.close())
# Verify the script file exists
script_path = os.path.join(os.getenv('WORKSPACE_BASE'), 'hello.sh')
script_path = os.path.join(workspace_base, 'hello.sh')
assert os.path.exists(script_path), 'The file "hello.sh" does not exist'
# Run the script and capture the output
@ -28,3 +31,43 @@ def test_write_simple_script():
assert (
result.stdout.strip() == 'hello'
), f'Expected output "hello", but got "{result.stdout.strip()}"'
@pytest.mark.skipif(
os.getenv('AGENT') == 'CodeActAgent'
and os.getenv('SANDBOX_TYPE').lower() == 'exec',
reason='CodeActAgent does not support exec sandbox since exec sandbox is NOT stateful',
)
@pytest.mark.skipif(
os.getenv('AGENT') == 'SWEAgent',
reason='SWEAgent is not capable of this task right now',
)
@pytest.mark.skipif(
os.getenv('SANDBOX_TYPE') == 'local',
reason='local sandbox shows environment-dependent absolute path for pwd command',
)
def test_edits():
# Move workspace artifacts to workspace_base location
source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
files = os.listdir(source_dir)
for file in files:
dest_file = os.path.join(workspace_base, file)
if os.path.exists(dest_file):
os.remove(dest_file)
print('source = ', os.path.join(source_dir, file), ' dest = ', dest_file)
shutil.copy(os.path.join(source_dir, file), dest_file)
# Execute the task
task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
controller = asyncio.run(main(task))
asyncio.run(controller.close())
# Verify bad.txt has been fixed
text = """This is a stupid typo.
Really?
No more typos!
Enjoy!
"""
with open(os.path.join(workspace_base, 'bad.txt'), 'r') as f:
content = f.read()
assert content.strip() == text.strip()

View File

@ -0,0 +1,4 @@
This is a stupid typoo.
Really?
No mor typos!
Enjoy!