mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Minimal Docker Sandbox with GPT-3.5 Execution Example (#48)
* minimal docker sandbox * make container_image as an argument (fall back to ubuntu); increase timeout to avoid return too early for long running commands; * add a minimal working (imperfect) example * fix typo * change default container name * attempt to fix "Bad file descriptor" error * handle ctrl+D * add Python gitignore * push sandbox to shared dockerhub for ease of use * move codeact example into research folder * add README for opendevin * change container image name to opendevin dockerhub * move folder; change example to a more general agent * update Message and Role * update docker sandbox to support mounting folder and switch to user with correct permission * make network as host * handle erorrs when attrs are not set yet * convert codeact agent into a compatible agent * add workspace to gitignore * make sure the agent interface adjustment works for langchain_agent
This commit is contained in:
parent
a722f5c0b1
commit
2de75d4782
2
.gitignore
vendored
2
.gitignore
vendored
@ -187,4 +187,4 @@ yarn-error.log*
|
||||
|
||||
# agent
|
||||
.envrc
|
||||
agent/workspace
|
||||
/workspace
|
||||
|
||||
@ -1 +1,2 @@
|
||||
from . import langchains_agent
|
||||
from . import codeact_agent
|
||||
|
||||
21
agenthub/codeact_agent/README.md
Normal file
21
agenthub/codeact_agent/README.md
Normal file
@ -0,0 +1,21 @@
|
||||
# CodeAct-based Agent Framework
|
||||
|
||||
This folder implements the [CodeAct idea](https://arxiv.org/abs/2402.13463) that relies on LLM to autonomously perform actions in a Bash shell. It requires more from the LLM itself: LLM needs to be capable enough to do all the stuff autonomously, instead of stuck in an infinite loop.
|
||||
|
||||
A minimalistic exmaple can be found at [research/codeact/examples/run_flask_server_with_bash.py](./examples/run_flask_server_with_bash.py):
|
||||
|
||||
```bash
|
||||
mkdir workspace
|
||||
PYTHONPATH=`pwd`:$PYTHONPATH python3 opendevin/main.py -d ./workspace -c CodeActAgent -t "Please write a flask app that returns 'Hello, World\!' at the root URL, then start the app on port 5000. python3 has already been installed for you."
|
||||
```
|
||||
|
||||
|
||||
Example: prompts `gpt-3.5-turbo-0125` to write a flask server, install `flask` library, and start the server.
|
||||
|
||||
<img width="951" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/325c3115-a343-4cc5-a92b-f1e5d552a077">
|
||||
|
||||
<img width="957" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/68ad10c1-744a-4e9d-bb29-0f163d665a0a">
|
||||
|
||||
Most of the things are working as expected, except at the end, the model did not follow the instruction to stop the interaction by outputting `<execute> exit </execute>` as instructed.
|
||||
|
||||
**TODO**: This should be fixable by either (1) including a complete in-context example like [this](https://github.com/xingyaoww/mint-bench/blob/main/mint/tasks/in_context_examples/reasoning/with_tool.txt), OR (2) collect some interaction data like this and fine-tune a model (like [this](https://github.com/xingyaoww/code-act), a more complex route).
|
||||
124
agenthub/codeact_agent/__init__.py
Normal file
124
agenthub/codeact_agent/__init__.py
Normal file
@ -0,0 +1,124 @@
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
from litellm import completion
|
||||
from termcolor import colored
|
||||
from typing import List, Dict
|
||||
|
||||
from opendevin.agent import Agent, Message, Role
|
||||
from opendevin.sandbox.docker import DockerInteractive
|
||||
|
||||
assert (
|
||||
"OPENAI_API_KEY" in os.environ
|
||||
), "Please set the OPENAI_API_KEY environment variable."
|
||||
|
||||
|
||||
|
||||
SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
|
||||
You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
|
||||
|
||||
DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
|
||||
For example:
|
||||
|
||||
You can list the files in the current directory by executing the following command:
|
||||
<execute>ls</execute>
|
||||
|
||||
You can also install packages using pip:
|
||||
<execute> pip install numpy </execute>
|
||||
|
||||
You can also write a block of code to a file:
|
||||
<execute>
|
||||
echo "import math
|
||||
print(math.pi)" > math.py
|
||||
</execute>
|
||||
|
||||
When you are done, execute "exit" to close the shell and end the conversation.
|
||||
"""
|
||||
|
||||
INVALID_INPUT_MESSAGE = (
|
||||
"I don't understand your input. \n"
|
||||
"If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
|
||||
"If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
|
||||
)
|
||||
|
||||
|
||||
def parse_response(response) -> str:
|
||||
action = response.choices[0].message.content
|
||||
if "<execute>" in action and "</execute>" not in action:
|
||||
action += "</execute>"
|
||||
return action
|
||||
|
||||
|
||||
class CodeActAgent(Agent):
|
||||
def __init__(
|
||||
self,
|
||||
instruction: str,
|
||||
workspace_dir: str,
|
||||
model_name: str,
|
||||
max_steps: int = 100
|
||||
) -> None:
|
||||
"""
|
||||
Initializes a new instance of the CodeActAgent class.
|
||||
|
||||
Parameters:
|
||||
- instruction (str): The instruction for the agent to execute.
|
||||
- max_steps (int): The maximum number of steps to run the agent.
|
||||
"""
|
||||
super().__init__(instruction, workspace_dir, model_name, max_steps)
|
||||
self._history = [Message(Role.SYSTEM, SYSTEM_MESSAGE)]
|
||||
self._history.append(Message(Role.USER, instruction))
|
||||
self.env = DockerInteractive(workspace_dir=workspace_dir)
|
||||
print(colored("===USER:===\n" + instruction, "green"))
|
||||
|
||||
def _history_to_messages(self) -> List[Dict]:
|
||||
return [message.to_dict() for message in self._history]
|
||||
|
||||
def run(self) -> None:
|
||||
"""
|
||||
Starts the execution of the assigned instruction. This method should
|
||||
be implemented by subclasses to define the specific execution logic.
|
||||
"""
|
||||
for _ in range(self.max_steps):
|
||||
response = completion(
|
||||
messages=self._history_to_messages(),
|
||||
model=self.model_name,
|
||||
stop=["</execute>"],
|
||||
temperature=0.0,
|
||||
seed=42,
|
||||
)
|
||||
action = parse_response(response)
|
||||
self._history.append(Message(Role.ASSISTANT, action))
|
||||
print(colored("===ASSISTANT:===\n" + action, "yellow"))
|
||||
|
||||
command = re.search(r"<execute>(.*)</execute>", action, re.DOTALL)
|
||||
if command is not None:
|
||||
# a command was found
|
||||
command = command.group(1)
|
||||
if command.strip() == "exit":
|
||||
print(colored("Exit received. Exiting...", "red"))
|
||||
break
|
||||
# execute the code
|
||||
observation = self.env.execute(command)
|
||||
self._history.append(Message(Role.ASSISTANT, observation))
|
||||
print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
|
||||
else:
|
||||
# we could provide a error message for the model to continue similar to
|
||||
# https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
|
||||
observation = INVALID_INPUT_MESSAGE
|
||||
self._history.append(Message(Role.ASSISTANT, observation))
|
||||
print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
|
||||
|
||||
self.env.close()
|
||||
|
||||
def chat(self, message: str) -> None:
|
||||
"""
|
||||
Optional method for interactive communication with the agent during its execution. Implementations
|
||||
can use this method to modify the agent's behavior or state based on chat inputs.
|
||||
|
||||
Parameters:
|
||||
- message (str): The chat message or command.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
Agent.register("CodeActAgent", CodeActAgent)
|
||||
@ -69,6 +69,9 @@ class LangchainsAgent(Agent):
|
||||
Starts the execution of the assigned instruction. This method should
|
||||
be implemented by subclasses to define the specific execution logic.
|
||||
"""
|
||||
print("Working in directory:", self.workspace_dir)
|
||||
os.chdir(self.workspace_dir)
|
||||
|
||||
agent = LangchainsAgentImpl(self.instruction)
|
||||
next_is_output = False
|
||||
for thought in INITIAL_THOUGHTS:
|
||||
|
||||
@ -4,3 +4,5 @@ langchain-community
|
||||
llama-index
|
||||
llama-index-vector-stores-chroma
|
||||
chromadb
|
||||
litellm
|
||||
termcolor
|
||||
|
||||
18
opendevin/README.md
Normal file
18
opendevin/README.md
Normal file
@ -0,0 +1,18 @@
|
||||
# OpenDevin Shared Abstraction and Components
|
||||
|
||||
This is a Python package that contains all the shared abstraction (e.g., Agent) and components (e.g., sandbox, web browser, search API, selenium).
|
||||
|
||||
## Sandbox component
|
||||
|
||||
Run the docker-based sandbox interactive:
|
||||
|
||||
```bash
|
||||
mkdir workspace
|
||||
python3 opendevin/sandbox/docker.py -d workspace
|
||||
```
|
||||
|
||||
It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user.
|
||||
|
||||
Example screenshot:
|
||||
|
||||
<img width="868" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/8dedcdee-437a-4469-870f-be29ca2b7c32">
|
||||
@ -5,11 +5,11 @@ from enum import Enum
|
||||
|
||||
|
||||
class Role(Enum):
|
||||
SYSTEM = "system" # system message for LLM
|
||||
USER = "user" # the user
|
||||
ASSISTANT = "assistant" # the agent
|
||||
ENVIRONMENT = "environment" # the environment (e.g., bash shell, web browser, etc.)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Message:
|
||||
"""
|
||||
@ -20,6 +20,20 @@ class Message:
|
||||
content: str
|
||||
# TODO: add more fields as needed
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""
|
||||
Converts the message to a dictionary (OpenAI chat-completion format).
|
||||
|
||||
Returns:
|
||||
- message (Dict): A dictionary representation of the message.
|
||||
"""
|
||||
role = self.role.value
|
||||
content = self.content
|
||||
if self.role == Role.ENVIRONMENT:
|
||||
content = f"Environment Observation:\n{content}"
|
||||
role = "user" # treat environment messages as user messages
|
||||
return {"role": role, "content": content}
|
||||
|
||||
|
||||
class Agent(ABC):
|
||||
"""
|
||||
@ -27,6 +41,11 @@ class Agent(ABC):
|
||||
executing a specific instruction and allowing human interaction with the
|
||||
agent during execution.
|
||||
It tracks the execution status and maintains a history of interactions.
|
||||
|
||||
:param instruction: The instruction for the agent to execute.
|
||||
:param workspace_dir: The working directory for the agent.
|
||||
:param model_name: The litellm name of the model to use for the agent.
|
||||
:param max_steps: The maximum number of steps to run the agent.
|
||||
"""
|
||||
|
||||
_registry: Dict[str, Type['Agent']] = {}
|
||||
@ -34,9 +53,13 @@ class Agent(ABC):
|
||||
def __init__(
|
||||
self,
|
||||
instruction: str,
|
||||
workspace_dir: str,
|
||||
model_name: str,
|
||||
max_steps: int = 100
|
||||
):
|
||||
self.instruction = instruction
|
||||
self.workspace_dir = workspace_dir
|
||||
self.model_name = model_name
|
||||
self.max_steps = max_steps
|
||||
|
||||
self._complete = False
|
||||
@ -105,18 +128,16 @@ class Agent(ABC):
|
||||
cls._registry[name] = agent_cls
|
||||
|
||||
@classmethod
|
||||
def create_instance(cls, name: str, instruction: str) -> 'Agent':
|
||||
def get_cls(cls, name: str) -> Type['Agent']:
|
||||
"""
|
||||
Creates an instance of a registered agent class based on the given name.
|
||||
Retrieves an agent class from the registry.
|
||||
|
||||
Parameters:
|
||||
- name (str): The name of the agent class to instantiate.
|
||||
- instruction (str): The instruction for the new agent instance.
|
||||
- name (str): The name of the class to retrieve
|
||||
|
||||
Returns:
|
||||
- An instance of the specified agent class.
|
||||
- agent_cls (Type['Agent']): The class registered under the specified name.
|
||||
"""
|
||||
if name not in cls._registry:
|
||||
raise ValueError(f"No agent class registered under '{name}'.")
|
||||
agent_cls = cls._registry[name]
|
||||
return agent_cls(instruction)
|
||||
return cls._registry[name]
|
||||
|
||||
@ -9,10 +9,13 @@ if __name__ == "__main__":
|
||||
parser.add_argument("-d", "--directory", required=True, type=str, help="The working directory for the agent")
|
||||
parser.add_argument("-t", "--task", required=True, type=str, help="The task for the agent to perform")
|
||||
parser.add_argument("-c", "--agent-cls", default="LangchainsAgent", type=str, help="The agent class to use")
|
||||
parser.add_argument("-m", "--model-name", default="gpt-3.5-turbo-0125", type=str, help="The (litellm) model name to use")
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Working in directory:", args.directory)
|
||||
os.chdir(args.directory)
|
||||
|
||||
agent = Agent.create_instance(args.agent_cls, args.task)
|
||||
AgentCls: Agent = Agent.get_cls(args.agent_cls)
|
||||
agent = AgentCls(
|
||||
instruction=args.task,
|
||||
workspace_dir=args.directory,
|
||||
model_name=args.model_name
|
||||
)
|
||||
agent.run()
|
||||
|
||||
20
opendevin/sandbox/Dockerfile
Normal file
20
opendevin/sandbox/Dockerfile
Normal file
@ -0,0 +1,20 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# install basic packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
vim \
|
||||
nano \
|
||||
unzip \
|
||||
zip \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
python3-dev \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# docker build -f opendevin/sandbox/Dockerfile -t opendevin/sandbox:v0.1 .
|
||||
# docker push opendevin/sandbox:v0.1
|
||||
145
opendevin/sandbox/docker.py
Normal file
145
opendevin/sandbox/docker.py
Normal file
@ -0,0 +1,145 @@
|
||||
import os
|
||||
import pty
|
||||
import sys
|
||||
import uuid
|
||||
import time
|
||||
import shlex
|
||||
import select
|
||||
import subprocess
|
||||
from typing import List
|
||||
from collections import namedtuple
|
||||
|
||||
InputType = namedtuple("InputDtype", ["content"])
|
||||
OutputType = namedtuple("OutputDtype", ["content"])
|
||||
|
||||
|
||||
class DockerInteractive:
|
||||
CONTAINER_IMAGE = "opendevin/sandbox:latest"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workspace_dir: str = None,
|
||||
container_image: str = None,
|
||||
timeout: int = 5
|
||||
):
|
||||
self.instance_id: str = uuid.uuid4()
|
||||
if workspace_dir is not None:
|
||||
assert os.path.exists(workspace_dir), f"Directory {workspace_dir} does not exist."
|
||||
# expand to absolute path
|
||||
workspace_dir = os.path.abspath(workspace_dir)
|
||||
else:
|
||||
workspace_dir = os.getcwd()
|
||||
print(f"workspace unspecified, using current directory: {workspace_dir}")
|
||||
|
||||
# TODO: this timeout is actually essential - need a better way to set it
|
||||
# if it is too short, the container may still waiting for previous
|
||||
# command to finish (e.g. apt-get update)
|
||||
# if it is too long, the user may have to wait for a unnecessary long time
|
||||
self.timeout: int = timeout
|
||||
|
||||
if container_image is None:
|
||||
container_image = self.CONTAINER_IMAGE
|
||||
|
||||
uid = os.getuid()
|
||||
cmd = (
|
||||
f"docker run -it --rm --name sandbox-{self.instance_id} "
|
||||
f"-v {workspace_dir}:/workspace "
|
||||
f"-w /workspace "
|
||||
f"--network=host "
|
||||
f"{container_image} "
|
||||
f"/bin/bash -c 'useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin && su devin'"
|
||||
)
|
||||
# print(f"Starting Docker container with command: {cmd}")
|
||||
self.master_fd, self.slave_fd = pty.openpty()
|
||||
self.container = subprocess.Popen(
|
||||
shlex.split(cmd),
|
||||
stdin=self.slave_fd,
|
||||
stdout=self.slave_fd,
|
||||
stderr=self.slave_fd,
|
||||
text=True,
|
||||
close_fds=True,
|
||||
)
|
||||
time.sleep(1) # wait for the container to start
|
||||
# TODO: use a more robust way to check if the container is ready
|
||||
self.history: List[InputType | OutputType] = [
|
||||
OutputType(self._wait_and_read_output())
|
||||
]
|
||||
|
||||
def _wait_and_read_output(self, user_input: str = None) -> str:
|
||||
output_str = ""
|
||||
while True:
|
||||
readable, _, _ = select.select([self.master_fd], [], [], self.timeout)
|
||||
if readable:
|
||||
output = os.read(self.master_fd, 1024).decode()
|
||||
if not output:
|
||||
break
|
||||
output_str += output
|
||||
else:
|
||||
break
|
||||
if user_input:
|
||||
output_str = output_str.lstrip(user_input).lstrip()
|
||||
return output_str
|
||||
|
||||
def execute(self, cmd: str) -> str:
|
||||
os.write(self.master_fd, (cmd + "\n").encode())
|
||||
self.history.append(InputType(cmd))
|
||||
|
||||
output = self._wait_and_read_output(cmd)
|
||||
self.history.append(OutputType(output))
|
||||
return output
|
||||
|
||||
def close(self):
|
||||
if hasattr(self, "master_fd") and self.master_fd is not None:
|
||||
os.close(self.master_fd)
|
||||
self.master_fd = None
|
||||
|
||||
if hasattr(self, "container") and self.container is not None:
|
||||
self.container.terminate()
|
||||
try:
|
||||
self.container.wait(timeout=5)
|
||||
print("Container stopped.")
|
||||
except subprocess.TimeoutExpired:
|
||||
self.container.kill()
|
||||
print("Container killed.")
|
||||
self.container = None
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Interactive Docker container")
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--directory",
|
||||
type=str,
|
||||
default=None,
|
||||
help="The directory to mount as the workspace in the Docker container.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
docker_interactive = DockerInteractive(
|
||||
workspace_dir=args.directory,
|
||||
container_image="opendevin/sandbox:latest",
|
||||
)
|
||||
print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
|
||||
|
||||
for item in docker_interactive.history:
|
||||
print(item.content, end="")
|
||||
sys.stdout.flush()
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
user_input = input()
|
||||
except EOFError:
|
||||
print("\nExiting...")
|
||||
break
|
||||
if user_input.lower() == "exit":
|
||||
print(f"Exiting...")
|
||||
break
|
||||
output = docker_interactive.execute(user_input)
|
||||
print(output, end="")
|
||||
sys.stdout.flush()
|
||||
except KeyboardInterrupt:
|
||||
print("\nExiting...")
|
||||
docker_interactive.close()
|
||||
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
||||
litellm
|
||||
termcolor
|
||||
Loading…
x
Reference in New Issue
Block a user