Minimal Docker Sandbox with GPT-3.5 Execution Example (#48)

* minimal docker sandbox

* make container_image as an argument (fall back to ubuntu);
increase timeout to avoid return too early for long running commands;

* add a minimal working (imperfect) example

* fix typo

* change default container name

* attempt to fix "Bad file descriptor" error

* handle ctrl+D

* add Python gitignore

* push sandbox to shared dockerhub for ease of use

* move codeact example into research folder

* add README for opendevin

* change container image name to opendevin dockerhub

* move folder; change example to a more general agent

* update Message and Role

* update docker sandbox to support mounting folder and switch to user with correct permission

* make network as host

* handle erorrs when attrs are not set yet

* convert codeact agent into a compatible agent

* add workspace to gitignore

* make sure the agent interface adjustment works for langchain_agent
This commit is contained in:
Xingyao Wang 2024-03-21 21:54:56 +08:00 committed by GitHub
parent a722f5c0b1
commit 2de75d4782
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 373 additions and 13 deletions

2
.gitignore vendored
View File

@ -187,4 +187,4 @@ yarn-error.log*
# agent
.envrc
agent/workspace
/workspace

View File

@ -1 +1,2 @@
from . import langchains_agent
from . import codeact_agent

View File

@ -0,0 +1,21 @@
# CodeAct-based Agent Framework
This folder implements the [CodeAct idea](https://arxiv.org/abs/2402.13463) that relies on LLM to autonomously perform actions in a Bash shell. It requires more from the LLM itself: LLM needs to be capable enough to do all the stuff autonomously, instead of stuck in an infinite loop.
A minimalistic exmaple can be found at [research/codeact/examples/run_flask_server_with_bash.py](./examples/run_flask_server_with_bash.py):
```bash
mkdir workspace
PYTHONPATH=`pwd`:$PYTHONPATH python3 opendevin/main.py -d ./workspace -c CodeActAgent -t "Please write a flask app that returns 'Hello, World\!' at the root URL, then start the app on port 5000. python3 has already been installed for you."
```
Example: prompts `gpt-3.5-turbo-0125` to write a flask server, install `flask` library, and start the server.
<img width="951" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/325c3115-a343-4cc5-a92b-f1e5d552a077">
<img width="957" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/68ad10c1-744a-4e9d-bb29-0f163d665a0a">
Most of the things are working as expected, except at the end, the model did not follow the instruction to stop the interaction by outputting `<execute> exit </execute>` as instructed.
**TODO**: This should be fixable by either (1) including a complete in-context example like [this](https://github.com/xingyaoww/mint-bench/blob/main/mint/tasks/in_context_examples/reasoning/with_tool.txt), OR (2) collect some interaction data like this and fine-tune a model (like [this](https://github.com/xingyaoww/code-act), a more complex route).

View File

@ -0,0 +1,124 @@
import os
import re
import argparse
from litellm import completion
from termcolor import colored
from typing import List, Dict
from opendevin.agent import Agent, Message, Role
from opendevin.sandbox.docker import DockerInteractive
assert (
"OPENAI_API_KEY" in os.environ
), "Please set the OPENAI_API_KEY environment variable."
SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
For example:
You can list the files in the current directory by executing the following command:
<execute>ls</execute>
You can also install packages using pip:
<execute> pip install numpy </execute>
You can also write a block of code to a file:
<execute>
echo "import math
print(math.pi)" > math.py
</execute>
When you are done, execute "exit" to close the shell and end the conversation.
"""
INVALID_INPUT_MESSAGE = (
"I don't understand your input. \n"
"If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
"If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
)
def parse_response(response) -> str:
action = response.choices[0].message.content
if "<execute>" in action and "</execute>" not in action:
action += "</execute>"
return action
class CodeActAgent(Agent):
def __init__(
self,
instruction: str,
workspace_dir: str,
model_name: str,
max_steps: int = 100
) -> None:
"""
Initializes a new instance of the CodeActAgent class.
Parameters:
- instruction (str): The instruction for the agent to execute.
- max_steps (int): The maximum number of steps to run the agent.
"""
super().__init__(instruction, workspace_dir, model_name, max_steps)
self._history = [Message(Role.SYSTEM, SYSTEM_MESSAGE)]
self._history.append(Message(Role.USER, instruction))
self.env = DockerInteractive(workspace_dir=workspace_dir)
print(colored("===USER:===\n" + instruction, "green"))
def _history_to_messages(self) -> List[Dict]:
return [message.to_dict() for message in self._history]
def run(self) -> None:
"""
Starts the execution of the assigned instruction. This method should
be implemented by subclasses to define the specific execution logic.
"""
for _ in range(self.max_steps):
response = completion(
messages=self._history_to_messages(),
model=self.model_name,
stop=["</execute>"],
temperature=0.0,
seed=42,
)
action = parse_response(response)
self._history.append(Message(Role.ASSISTANT, action))
print(colored("===ASSISTANT:===\n" + action, "yellow"))
command = re.search(r"<execute>(.*)</execute>", action, re.DOTALL)
if command is not None:
# a command was found
command = command.group(1)
if command.strip() == "exit":
print(colored("Exit received. Exiting...", "red"))
break
# execute the code
observation = self.env.execute(command)
self._history.append(Message(Role.ASSISTANT, observation))
print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
else:
# we could provide a error message for the model to continue similar to
# https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
observation = INVALID_INPUT_MESSAGE
self._history.append(Message(Role.ASSISTANT, observation))
print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
self.env.close()
def chat(self, message: str) -> None:
"""
Optional method for interactive communication with the agent during its execution. Implementations
can use this method to modify the agent's behavior or state based on chat inputs.
Parameters:
- message (str): The chat message or command.
"""
raise NotImplementedError
Agent.register("CodeActAgent", CodeActAgent)

View File

@ -69,6 +69,9 @@ class LangchainsAgent(Agent):
Starts the execution of the assigned instruction. This method should
be implemented by subclasses to define the specific execution logic.
"""
print("Working in directory:", self.workspace_dir)
os.chdir(self.workspace_dir)
agent = LangchainsAgentImpl(self.instruction)
next_is_output = False
for thought in INITIAL_THOUGHTS:

View File

@ -4,3 +4,5 @@ langchain-community
llama-index
llama-index-vector-stores-chroma
chromadb
litellm
termcolor

18
opendevin/README.md Normal file
View File

@ -0,0 +1,18 @@
# OpenDevin Shared Abstraction and Components
This is a Python package that contains all the shared abstraction (e.g., Agent) and components (e.g., sandbox, web browser, search API, selenium).
## Sandbox component
Run the docker-based sandbox interactive:
```bash
mkdir workspace
python3 opendevin/sandbox/docker.py -d workspace
```
It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user.
Example screenshot:
<img width="868" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/8dedcdee-437a-4469-870f-be29ca2b7c32">

View File

@ -5,11 +5,11 @@ from enum import Enum
class Role(Enum):
SYSTEM = "system" # system message for LLM
USER = "user" # the user
ASSISTANT = "assistant" # the agent
ENVIRONMENT = "environment" # the environment (e.g., bash shell, web browser, etc.)
@dataclass
class Message:
"""
@ -20,6 +20,20 @@ class Message:
content: str
# TODO: add more fields as needed
def to_dict(self) -> Dict:
"""
Converts the message to a dictionary (OpenAI chat-completion format).
Returns:
- message (Dict): A dictionary representation of the message.
"""
role = self.role.value
content = self.content
if self.role == Role.ENVIRONMENT:
content = f"Environment Observation:\n{content}"
role = "user" # treat environment messages as user messages
return {"role": role, "content": content}
class Agent(ABC):
"""
@ -27,6 +41,11 @@ class Agent(ABC):
executing a specific instruction and allowing human interaction with the
agent during execution.
It tracks the execution status and maintains a history of interactions.
:param instruction: The instruction for the agent to execute.
:param workspace_dir: The working directory for the agent.
:param model_name: The litellm name of the model to use for the agent.
:param max_steps: The maximum number of steps to run the agent.
"""
_registry: Dict[str, Type['Agent']] = {}
@ -34,9 +53,13 @@ class Agent(ABC):
def __init__(
self,
instruction: str,
workspace_dir: str,
model_name: str,
max_steps: int = 100
):
self.instruction = instruction
self.workspace_dir = workspace_dir
self.model_name = model_name
self.max_steps = max_steps
self._complete = False
@ -105,18 +128,16 @@ class Agent(ABC):
cls._registry[name] = agent_cls
@classmethod
def create_instance(cls, name: str, instruction: str) -> 'Agent':
def get_cls(cls, name: str) -> Type['Agent']:
"""
Creates an instance of a registered agent class based on the given name.
Retrieves an agent class from the registry.
Parameters:
- name (str): The name of the agent class to instantiate.
- instruction (str): The instruction for the new agent instance.
- name (str): The name of the class to retrieve
Returns:
- An instance of the specified agent class.
- agent_cls (Type['Agent']): The class registered under the specified name.
"""
if name not in cls._registry:
raise ValueError(f"No agent class registered under '{name}'.")
agent_cls = cls._registry[name]
return agent_cls(instruction)
return cls._registry[name]

View File

@ -9,10 +9,13 @@ if __name__ == "__main__":
parser.add_argument("-d", "--directory", required=True, type=str, help="The working directory for the agent")
parser.add_argument("-t", "--task", required=True, type=str, help="The task for the agent to perform")
parser.add_argument("-c", "--agent-cls", default="LangchainsAgent", type=str, help="The agent class to use")
parser.add_argument("-m", "--model-name", default="gpt-3.5-turbo-0125", type=str, help="The (litellm) model name to use")
args = parser.parse_args()
print("Working in directory:", args.directory)
os.chdir(args.directory)
agent = Agent.create_instance(args.agent_cls, args.task)
AgentCls: Agent = Agent.get_cls(args.agent_cls)
agent = AgentCls(
instruction=args.task,
workspace_dir=args.directory,
model_name=args.model_name
)
agent.run()

View File

@ -0,0 +1,20 @@
FROM ubuntu:22.04
# install basic packages
RUN apt-get update && apt-get install -y \
curl \
wget \
git \
vim \
nano \
unzip \
zip \
python3 \
python3-pip \
python3-venv \
python3-dev \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# docker build -f opendevin/sandbox/Dockerfile -t opendevin/sandbox:v0.1 .
# docker push opendevin/sandbox:v0.1

145
opendevin/sandbox/docker.py Normal file
View File

@ -0,0 +1,145 @@
import os
import pty
import sys
import uuid
import time
import shlex
import select
import subprocess
from typing import List
from collections import namedtuple
InputType = namedtuple("InputDtype", ["content"])
OutputType = namedtuple("OutputDtype", ["content"])
class DockerInteractive:
CONTAINER_IMAGE = "opendevin/sandbox:latest"
def __init__(
self,
workspace_dir: str = None,
container_image: str = None,
timeout: int = 5
):
self.instance_id: str = uuid.uuid4()
if workspace_dir is not None:
assert os.path.exists(workspace_dir), f"Directory {workspace_dir} does not exist."
# expand to absolute path
workspace_dir = os.path.abspath(workspace_dir)
else:
workspace_dir = os.getcwd()
print(f"workspace unspecified, using current directory: {workspace_dir}")
# TODO: this timeout is actually essential - need a better way to set it
# if it is too short, the container may still waiting for previous
# command to finish (e.g. apt-get update)
# if it is too long, the user may have to wait for a unnecessary long time
self.timeout: int = timeout
if container_image is None:
container_image = self.CONTAINER_IMAGE
uid = os.getuid()
cmd = (
f"docker run -it --rm --name sandbox-{self.instance_id} "
f"-v {workspace_dir}:/workspace "
f"-w /workspace "
f"--network=host "
f"{container_image} "
f"/bin/bash -c 'useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin && su devin'"
)
# print(f"Starting Docker container with command: {cmd}")
self.master_fd, self.slave_fd = pty.openpty()
self.container = subprocess.Popen(
shlex.split(cmd),
stdin=self.slave_fd,
stdout=self.slave_fd,
stderr=self.slave_fd,
text=True,
close_fds=True,
)
time.sleep(1) # wait for the container to start
# TODO: use a more robust way to check if the container is ready
self.history: List[InputType | OutputType] = [
OutputType(self._wait_and_read_output())
]
def _wait_and_read_output(self, user_input: str = None) -> str:
output_str = ""
while True:
readable, _, _ = select.select([self.master_fd], [], [], self.timeout)
if readable:
output = os.read(self.master_fd, 1024).decode()
if not output:
break
output_str += output
else:
break
if user_input:
output_str = output_str.lstrip(user_input).lstrip()
return output_str
def execute(self, cmd: str) -> str:
os.write(self.master_fd, (cmd + "\n").encode())
self.history.append(InputType(cmd))
output = self._wait_and_read_output(cmd)
self.history.append(OutputType(output))
return output
def close(self):
if hasattr(self, "master_fd") and self.master_fd is not None:
os.close(self.master_fd)
self.master_fd = None
if hasattr(self, "container") and self.container is not None:
self.container.terminate()
try:
self.container.wait(timeout=5)
print("Container stopped.")
except subprocess.TimeoutExpired:
self.container.kill()
print("Container killed.")
self.container = None
def __del__(self):
self.close()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Interactive Docker container")
parser.add_argument(
"-d",
"--directory",
type=str,
default=None,
help="The directory to mount as the workspace in the Docker container.",
)
args = parser.parse_args()
docker_interactive = DockerInteractive(
workspace_dir=args.directory,
container_image="opendevin/sandbox:latest",
)
print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
for item in docker_interactive.history:
print(item.content, end="")
sys.stdout.flush()
try:
while True:
try:
user_input = input()
except EOFError:
print("\nExiting...")
break
if user_input.lower() == "exit":
print(f"Exiting...")
break
output = docker_interactive.execute(user_input)
print(output, end="")
sys.stdout.flush()
except KeyboardInterrupt:
print("\nExiting...")
docker_interactive.close()

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
litellm
termcolor