Remove remaining global config (#3099)

* Remove global config from memory * Remove runtime global config * Remove from storage * Remove global config * Fix event stream tests * Fix sandbox issue * Change config * Removed transferred tests * Add swe env box * Fixes on testing * Fixed some tests * Fix typing * Fix ipython test * Revive function * Make temp_dir fixture * Remove test to avoid circular import
2025-12-26 05:48:36 +08:00 · 2024-07-26 14:43:32 -04:00 · 2024-07-26 14:43:32 -04:00 · 275ea706cf
commit 275ea706cf
parent 3301beffec
41 changed files with 279 additions and 306 deletions
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@ -18,12 +18,14 @@ from opendevin.controller.agent import Agent

 # from evaluation.EDA.scorer import question_scorer
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 game = None


--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@ -22,7 +22,7 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -30,6 +30,8 @@ from opendevin.events.action import CmdRunAction, MessageAction
 from opendevin.llm.llm import LLM
 from opendevin.runtime.docker.ssh_box import DockerSSHBox

+config = load_app_config()
+

 def process_instance(
    instance: pd.Series,
--- a/evaluation/biocoder/biocoder_env_box.py
+++ b/evaluation/biocoder/biocoder_env_box.py
@ -7,7 +7,7 @@ from dataclasses import dataclass

 from datasets import load_dataset

-from opendevin.core.config import config
+from opendevin.core.config import load_app_config
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.runtime.docker.ssh_box import DockerSSHBox
 from opendevin.runtime.plugins import (
@ -16,6 +16,8 @@ from opendevin.runtime.plugins import (
    SWEAgentCommandsRequirement,
 )

+config = load_app_config()
+
 BIOCODER_BENCH_CONTAINER_IMAGE = 'public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0'


--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@ -18,12 +18,14 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': partial(
        codeact_user_response, encapsulate_solution=True, try_parse=None
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@ -21,13 +21,15 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.events.action import MessageAction
 from opendevin.llm.llm import LLM

+config = load_app_config()
+

 def codeact_user_response(state: State) -> str:
    msg = (
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@ -15,12 +15,14 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 # Only CodeActAgent can delegate to BrowsingAgent
 SUPPORTED_AGENT_CLS = {'CodeActAgent'}

--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@ -20,13 +20,15 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.events.action import CmdRunAction, MessageAction
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 DATASET_CACHE_DIR = '~/.cache/open-devin/evals/gaia'
 DATASET_CACHE_DIR = os.path.expanduser(DATASET_CACHE_DIR)

--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@ -12,7 +12,7 @@ from tqdm import tqdm

 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -21,6 +21,8 @@ from opendevin.llm.llm import LLM

 from .utils import encode_question, get_data

+config = load_app_config()
+

 def cleanup():
    print('Cleaning up child processes...')
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@ -36,7 +36,7 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -44,6 +44,8 @@ from opendevin.events.action import Action, AgentFinishAction, MessageAction
 from opendevin.events.observation import Observation
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 ACTION_FORMAT = """
 <<FINAL_ANSWER||
 <insert correct answer here, must be one of A, B, C, D> (Please dont use any additional characters. Just the letter of the correct answer (A/B/C/D).)
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@ -26,12 +26,14 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 IMPORT_HELPER = {
    'python': [
        'import math',
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@ -17,12 +17,14 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
 }
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@ -15,7 +15,7 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -23,6 +23,8 @@ from opendevin.llm.llm import LLM
 from opendevin.runtime.docker.ssh_box import DockerSSHBox
 from opendevin.runtime.tools import RuntimeTool

+config = load_app_config()
+
 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}

 docker_ssh_box: DockerSSHBox | None = None
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@ -16,7 +16,7 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -27,6 +27,8 @@ from .env import SimplifiedEnv
 from .prompts import ToolPromptTemplate
 from .tasks import Task

+config = load_app_config()
+

 def codeact_user_response_mint(state: State, task: Task, task_config: Dict[str, int]):
    logger.info(f'Gold reference: {task.reference}')
--- a/evaluation/ml_bench/run_analysis.py
+++ b/evaluation/ml_bench/run_analysis.py
@ -4,10 +4,12 @@ import pprint

 import tqdm

-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.llm.llm import LLM

+config = load_app_config()
+

 def extract_test_results(res_file_path: str) -> tuple[list[str], list[str]]:
    passed = []
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@ -30,13 +30,15 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM
 from opendevin.runtime.docker.ssh_box import DockerSSHBox

+config = load_app_config()
+
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
 }
--- a/evaluation/regression/run_tests.py
+++ b/evaluation/regression/run_tests.py
@ -2,7 +2,9 @@ import argparse

 import pytest

-from opendevin.config import config
+from opendevin.config import load_app_config
+
+config = load_app_config()

 if __name__ == '__main__':
    """Main entry point of the script.
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@ -19,12 +19,14 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
 from opendevin.llm.llm import LLM

+config = load_app_config()
+
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false') == 'true'
 USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false') == 'true'

--- a/evaluation/swe_bench/swe_env_box.py
+++ b/evaluation/swe_bench/swe_env_box.py
@ -8,7 +8,7 @@ from datasets import load_dataset
 from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK
 from swebench.harness.utils import get_test_directives

-from opendevin.core.config import SandboxConfig, config
+from opendevin.core.config import AppConfig, SandboxConfig, load_app_config
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.runtime.docker.ssh_box import DockerSSHBox
 from opendevin.runtime.plugins import (
@ -27,6 +27,7 @@ def get_image_name_from_instance_id(instance_id: str) -> str:
 class SWEBenchSSHBox(DockerSSHBox):
    def __init__(
        self,
+        config: AppConfig,
        container_image: str,
        timeout: int = 120,
        sid: str | None = None,
@ -146,6 +147,7 @@ class SWEBenchSSHBox(DockerSSHBox):
    def get_box_for_instance(
        cls,
        instance,
+        config: AppConfig,
        workspace_dir_name=None,
        skip_workspace_mount: bool = True,
        workspace_mount_path: str | None = None,
@ -164,7 +166,7 @@ class SWEBenchSSHBox(DockerSSHBox):
            config.workspace_mount_path = workspace_mount_path

            # linting python after editing helps LLM fix indentations
-            config.enable_auto_lint = True
+            config.sandbox.enable_auto_lint = True
            # Need to run as root to use SWEBench container
            config.run_as_devin = False
            if use_instance_image:
@ -175,6 +177,7 @@ class SWEBenchSSHBox(DockerSSHBox):
                container_image = SWE_BENCH_CONTAINER_IMAGE
            sandbox = cls(
                container_image=container_image,
+                config=config,
                swe_instance_id=instance['instance_id'],
                swe_instance=instance,
                skip_workspace_mount=skip_workspace_mount,
@ -239,6 +242,8 @@ class SWEBenchSSHBox(DockerSSHBox):


 if __name__ == '__main__':
+    config = load_app_config()
+
    # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
    # so we don't need to manage file uploading to OpenDevin's repo
    dataset = load_dataset('princeton-nlp/SWE-bench_Lite')
@ -252,6 +257,7 @@ if __name__ == '__main__':
    EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict()

    sandbox = SWEBenchSSHBox.get_box_for_instance(
+        config=config,
        instance=EXAMPLE_INSTANCE,
        sandbox_plugins=[AgentSkillsRequirement(), JupyterRequirement()],
        use_instance_image=USE_INSTANCE_IMAGE,
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@ -15,7 +15,7 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, get_parser
+from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -23,6 +23,8 @@ from opendevin.llm.llm import LLM

 from .utils import download_data, download_tools, encode_question, eval_answer, get_data

+config = load_app_config()
+
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
 }
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@ -15,7 +15,7 @@ from evaluation.utils.shared import (
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import run_agent_controller
@ -23,6 +23,8 @@ from opendevin.llm.llm import LLM
 from opendevin.runtime.docker.ssh_box import DockerSSHBox
 from opendevin.runtime.tools import RuntimeTool

+config = load_app_config()
+
 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}


--- a/opendevin/controller/state/state.py
+++ b/opendevin/controller/state/state.py
@ -12,7 +12,7 @@ from opendevin.events.action import (
 )
 from opendevin.events.action.agent import AgentFinishAction
 from opendevin.memory.history import ShortTermHistory
-from opendevin.storage import get_file_store
+from opendevin.storage.files import FileStore


 class TrafficControlState(str, Enum):
@ -107,22 +107,20 @@ class State:
    end_id: int = -1
    almost_stuck: int = 0

-    def save_to_session(self, sid: str):
-        fs = get_file_store()
+    def save_to_session(self, sid: str, file_store: FileStore):
        pickled = pickle.dumps(self)
        logger.debug(f'Saving state to session {sid}:{self.agent_state}')
        encoded = base64.b64encode(pickled).decode('utf-8')
        try:
-            fs.write(f'sessions/{sid}/agent_state.pkl', encoded)
+            file_store.write(f'sessions/{sid}/agent_state.pkl', encoded)
        except Exception as e:
            logger.error(f'Failed to save state to session: {e}')
            raise e

    @staticmethod
-    def restore_from_session(sid: str) -> 'State':
-        fs = get_file_store()
+    def restore_from_session(sid: str, file_store: FileStore) -> 'State':
        try:
-            encoded = fs.read(f'sessions/{sid}/agent_state.pkl')
+            encoded = file_store.read(f'sessions/{sid}/agent_state.pkl')
            pickled = base64.b64decode(encoded)
            state = pickle.loads(pickled)
        except Exception as e:
--- a/opendevin/core/config.py
+++ b/opendevin/core/config.py
@ -18,6 +18,8 @@ load_dotenv()


 LLM_SENSITIVE_FIELDS = ['api_key', 'aws_access_key_id', 'aws_secret_access_key']
+_DEFAULT_AGENT = 'CodeActAgent'
+_MAX_ITERATIONS = 100


@dataclass
@ -220,7 +222,7 @@ class AppConfig(metaclass=Singleton):

    llms: dict[str, LLMConfig] = field(default_factory=dict)
    agents: dict = field(default_factory=dict)
-    default_agent: str = 'CodeActAgent'
+    default_agent: str = _DEFAULT_AGENT
    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
    runtime: str = 'server'
    file_store: str = 'memory'
@ -234,7 +236,7 @@ class AppConfig(metaclass=Singleton):
    cache_dir: str = '/tmp/cache'
    run_as_devin: bool = True
    confirmation_mode: bool = False
-    max_iterations: int = 100
+    max_iterations: int = _MAX_ITERATIONS
    max_budget_per_task: float | None = None
    e2b_api_key: str = ''
    ssh_hostname: str = 'localhost'
@ -631,21 +633,20 @@ def get_parser() -> argparse.ArgumentParser:
    parser.add_argument(
        '-c',
        '--agent-cls',
-        default=config.default_agent,
+        default=_DEFAULT_AGENT,
        type=str,
        help='Name of the default agent to use',
    )
    parser.add_argument(
        '-i',
        '--max-iterations',
-        default=config.max_iterations,
+        default=_MAX_ITERATIONS,
        type=int,
        help='The maximum number of iterations to run the agent',
    )
    parser.add_argument(
        '-b',
        '--max-budget-per-task',
-        default=config.max_budget_per_task,
        type=float,
        help='The maximum budget allowed per task, beyond which the agent will stop.',
    )
@ -688,9 +689,6 @@ def parse_arguments() -> argparse.Namespace:
    """Parse the command line arguments."""
    parser = get_parser()
    parsed_args, _ = parser.parse_known_args()
-    if parsed_args.directory:
-        config.workspace_base = os.path.abspath(parsed_args.directory)
-        print(f'Setting workspace base to {config.workspace_base}')
    return parsed_args


@ -708,6 +706,3 @@ def load_app_config(set_logging_levels: bool = True) -> AppConfig:
        logger.DEBUG = config.debug
        logger.DISABLE_COLOR_PRINTING = config.disable_color
    return config
-
-
-config = load_app_config()
--- a/opendevin/core/main.py
+++ b/opendevin/core/main.py
@ -7,7 +7,7 @@ import agenthub  # noqa F401 (we import this to get the agents registered)
 from opendevin.controller import AgentController
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import config, get_llm_config_arg, parse_arguments
+from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import AgentState
 from opendevin.events import EventSource, EventStream, EventStreamSubscriber
@ -17,6 +17,9 @@ from opendevin.events.observation import AgentStateChangedObservation
 from opendevin.llm.llm import LLM
 from opendevin.runtime import get_runtime_cls
 from opendevin.runtime.sandbox import Sandbox
+from opendevin.storage import get_file_store
+
+config = load_app_config()


 def read_task_from_file(file_path: str) -> str:
@ -58,15 +61,16 @@ async def run_agent_controller(
    )

    # set up the event stream
+    file_store = get_file_store(config.file_store, config.file_store_path)
    cli_session = 'main' + ('_' + sid if sid else '')
-    event_stream = EventStream(cli_session)
+    event_stream = EventStream(cli_session, file_store)

    # restore cli session if enabled
    initial_state = None
    if config.enable_cli_session:
        try:
            logger.info('Restoring agent state from cli session')
-            initial_state = State.restore_from_session(cli_session)
+            initial_state = State.restore_from_session(cli_session, file_store)
        except Exception as e:
            print('Error restoring state', e)

@ -83,9 +87,7 @@ async def run_agent_controller(

    # runtime and tools
    runtime_cls = get_runtime_cls(config.runtime)
-    runtime = runtime_cls(
-        sandbox_config=config.sandbox, event_stream=event_stream, sandbox=sandbox
-    )
+    runtime = runtime_cls(config=config, event_stream=event_stream, sandbox=sandbox)
    await runtime.ainit()
    runtime.init_sandbox_plugins(controller.agent.sandbox_plugins)
    runtime.init_runtime_tools(
@ -142,7 +144,7 @@ async def run_agent_controller(
    # save session when we're about to close
    if config.enable_cli_session:
        end_state = controller.get_state()
-        end_state.save_to_session(cli_session)
+        end_state.save_to_session(cli_session, file_store)

    # close when done
    await controller.close()
--- a/opendevin/events/stream.py
+++ b/opendevin/events/stream.py
@ -7,7 +7,7 @@ from typing import Callable, Iterable
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.utils import json
 from opendevin.events.serialization.event import event_from_dict, event_to_dict
-from opendevin.storage import FileStore, get_file_store
+from opendevin.storage import FileStore

 from .event import Event, EventSource

@ -29,15 +29,15 @@ class EventStream:
    _lock: threading.Lock
    _file_store: FileStore

-    def __init__(self, sid: str):
+    def __init__(self, sid: str, file_store: FileStore):
        self.sid = sid
-        self._file_store = get_file_store()
+        self._file_store = file_store
        self._subscribers = {}
        self._cur_id = 0
        self._lock = threading.Lock()
        self._reinitialize_from_file_store()

-    def _reinitialize_from_file_store(self):
+    def _reinitialize_from_file_store(self) -> None:
        try:
            events = self._file_store.list(f'sessions/{self.sid}/events')
        except FileNotFoundError:
--- a/opendevin/memory/memory.py
+++ b/opendevin/memory/memory.py
@ -13,14 +13,14 @@ from tenacity import (
    wait_random_exponential,
 )

-from opendevin.core.config import LLMConfig, config
+from opendevin.core.config import LLMConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.utils import json

-# TODO: this should depend on specific agent setting
-num_retries = config.get_llm_config().num_retries
-retry_min_wait = config.get_llm_config().retry_min_wait
-retry_max_wait = config.get_llm_config().retry_max_wait
+# TODO: this could be made configurable
+num_retries: int = 10
+retry_min_wait: int = 3
+retry_max_wait: int = 300

 # llama-index includes a retry decorator around openai.get_embeddings() function
 # it is initialized with hard-coded values and errors
@ -110,21 +110,19 @@ class EmbeddingsLoader:
 class LongTermMemory:
    """Handles storing information for the agent to access later, using chromadb."""

-    def __init__(self, agent_config_name='agent'):
+    def __init__(self, llm_config: LLMConfig, memory_max_threads: int = 1):
        """Initialize the chromadb and set up ChromaVectorStore for later use."""
        db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
        self.collection = db.get_or_create_collection(name='memories')
        vector_store = ChromaVectorStore(chroma_collection=self.collection)
-        agent_config = config.get_agent_config(agent_config_name)
-        llm_config = config.get_llm_config(agent_config.llm_config)
        embedding_strategy = llm_config.embedding_model
        embed_model = EmbeddingsLoader.get_embedding_model(
            embedding_strategy, llm_config
        )
        self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model)
-        self.sema = threading.Semaphore(value=agent_config.memory_max_threads)
+        self.sema = threading.Semaphore(value=memory_max_threads)
        self.thought_idx = 0
-        self._add_threads = []
+        self._add_threads: list[threading.Thread] = []

    def add_event(self, event: dict):
        """Adds a new event to the long term memory with a unique id.
--- a/opendevin/runtime/client/runtime.py
+++ b/opendevin/runtime/client/runtime.py
@ -6,7 +6,7 @@ import aiohttp
 import docker
 import tenacity

-from opendevin.core.config import SandboxConfig, config
+from opendevin.core.config import AppConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events import EventSource, EventStream
 from opendevin.events.action import (
@ -26,11 +26,7 @@ from opendevin.events.observation import (
 )
 from opendevin.events.serialization import event_to_dict, observation_from_dict
 from opendevin.events.serialization.action import ACTION_TYPE_TO_CLASS
-from opendevin.runtime.plugins import (
-    AgentSkillsRequirement,
-    JupyterRequirement,
-    PluginRequirement,
-)
+from opendevin.runtime.plugins import PluginRequirement
 from opendevin.runtime.runtime import Runtime
 from opendevin.runtime.utils import find_available_tcp_port
 from opendevin.runtime.utils.runtime_build import build_runtime_image
@ -45,15 +41,13 @@ class EventStreamRuntime(Runtime):

    def __init__(
        self,
-        sandbox_config: SandboxConfig,
+        config: AppConfig,
        event_stream: EventStream,
        sid: str = 'default',
        container_image: str | None = None,
        plugins: list[PluginRequirement] | None = None,
    ):
-        super().__init__(
-            sandbox_config, event_stream, sid
-        )  # will initialize the event stream
+        super().__init__(config, event_stream, sid)  # will initialize the event stream
        self._port = find_available_tcp_port()
        self.api_url = f'http://localhost:{self._port}'
        self.session: Optional[aiohttp.ClientSession] = None
@ -81,11 +75,11 @@ class EventStreamRuntime(Runtime):
            # NOTE: You can need set DEBUG=true to update the source code
            # inside the container. This is useful when you want to test/debug the
            # latest code in the runtime docker container.
-            update_source_code=self.sandbox_config.update_source_code,
+            update_source_code=self.config.sandbox.update_source_code,
        )
        self.container = await self._init_container(
            self.sandbox_workspace_dir,
-            mount_dir=config.workspace_mount_path,
+            mount_dir=self.config.workspace_mount_path,
            plugins=self.plugins,
        )
        # MUST call super().ainit() to initialize both default env vars
@ -109,7 +103,7 @@ class EventStreamRuntime(Runtime):
    async def _init_container(
        self,
        sandbox_workspace_dir: str,
-        mount_dir: str = config.workspace_mount_path,
+        mount_dir: str,
        plugins: list[PluginRequirement] | None = None,
    ):
        try:
@ -122,7 +116,7 @@ class EventStreamRuntime(Runtime):

            network_mode: str | None = None
            port_mapping: dict[str, int] | None = None
-            if self.sandbox_config.use_host_network:
+            if self.config.sandbox.use_host_network:
                network_mode = 'host'
                logger.warn(
                    'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop'
@ -144,7 +138,7 @@ class EventStreamRuntime(Runtime):
                working_dir='/opendevin/code/',
                name=self.container_name,
                detach=True,
-                environment={'DEBUG': 'true'} if config.debug else None,
+                environment={'DEBUG': 'true'} if self.config.debug else None,
                volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}},
            )
            logger.info(f'Container started. Server url: {self.api_url}')
@ -179,7 +173,7 @@ class EventStreamRuntime(Runtime):

    @property
    def sandbox_workspace_dir(self):
-        return config.workspace_mount_path_in_sandbox
+        return self.config.workspace_mount_path_in_sandbox

    async def close(self, close_client: bool = True):
        if self.session is not None and not self.session.closed:
@ -286,75 +280,3 @@ class EventStreamRuntime(Runtime):
    # Overwrite the init_sandbox_plugins
    def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
        pass
-
-
-async def test_run_command():
-    sid = 'test'
-    cli_session = 'main' + ('_' + sid if sid else '')
-    event_stream = EventStream(cli_session)
-    runtime = EventStreamRuntime(
-        sandbox_config=config.sandbox, event_stream=event_stream, sid=sid
-    )
-    await runtime.ainit()
-    await runtime.run_action(CmdRunAction('ls -l'))
-
-
-async def test_event_stream():
-    sid = 'test'
-    cli_session = 'main' + ('_' + sid if sid else '')
-    event_stream = EventStream(cli_session)
-    runtime = EventStreamRuntime(
-        sandbox_config=config.sandbox,
-        event_stream=event_stream,
-        sid=sid,
-        container_image='ubuntu:22.04',
-        plugins=[JupyterRequirement(), AgentSkillsRequirement()],
-    )
-    await runtime.ainit()
-
-    # Test run command
-    action_cmd = CmdRunAction(command='ls -l')
-    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-    logger.info(await runtime.run_action(action_cmd), extra={'msg_type': 'OBSERVATION'})
-
-    # Test run ipython
-    test_code = "print('Hello, `World`!\\n')"
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    logger.info(
-        await runtime.run_action(action_ipython), extra={'msg_type': 'OBSERVATION'}
-    )
-
-    # Test read file (file should not exist)
-    action_read = FileReadAction(path='hello.sh')
-    logger.info(action_read, extra={'msg_type': 'ACTION'})
-    logger.info(
-        await runtime.run_action(action_read), extra={'msg_type': 'OBSERVATION'}
-    )
-
-    # Test write file
-    action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
-    logger.info(action_write, extra={'msg_type': 'ACTION'})
-    logger.info(
-        await runtime.run_action(action_write), extra={'msg_type': 'OBSERVATION'}
-    )
-
-    # Test read file (file should exist)
-    action_read = FileReadAction(path='hello.sh')
-    logger.info(action_read, extra={'msg_type': 'ACTION'})
-    logger.info(
-        await runtime.run_action(action_read), extra={'msg_type': 'OBSERVATION'}
-    )
-
-    # Test browse
-    action_browse = BrowseURLAction(url='https://google.com')
-    logger.info(action_browse, extra={'msg_type': 'ACTION'})
-    logger.info(
-        await runtime.run_action(action_browse), extra={'msg_type': 'OBSERVATION'}
-    )
-
-    await runtime.close()
-
-
-if __name__ == '__main__':
-    asyncio.run(test_event_stream())
--- a/opendevin/runtime/e2b/runtime.py
+++ b/opendevin/runtime/e2b/runtime.py
@ -1,4 +1,4 @@
-from opendevin.core.config import SandboxConfig
+from opendevin.core.config import AppConfig
 from opendevin.events.action import (
    FileReadAction,
    FileWriteAction,
@ -21,12 +21,12 @@ from .sandbox import E2BSandbox
 class E2BRuntime(ServerRuntime):
    def __init__(
        self,
-        sandbox_config: SandboxConfig,
+        config: AppConfig,
        event_stream: EventStream,
        sid: str = 'default',
        sandbox: Sandbox | None = None,
    ):
-        super().__init__(sandbox_config, event_stream, sid, sandbox)
+        super().__init__(config, event_stream, sid, sandbox)
        if not isinstance(self.sandbox, E2BSandbox):
            raise ValueError('E2BRuntime requires an E2BSandbox')
        self.file_store = E2BFileStore(self.sandbox.filesystem)
--- a/opendevin/runtime/runtime.py
+++ b/opendevin/runtime/runtime.py
@ -6,7 +6,7 @@ import os
 from abc import abstractmethod
 from typing import Any, Optional

-from opendevin.core.config import SandboxConfig
+from opendevin.core.config import AppConfig, SandboxConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events import EventStream, EventStreamSubscriber
 from opendevin.events.action import (
@ -57,15 +57,15 @@ class Runtime:

    def __init__(
        self,
-        sandbox_config: SandboxConfig,
+        config: AppConfig,
        event_stream: EventStream,
        sid: str = 'default',
    ):
        self.sid = sid
        self.event_stream = event_stream
        self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event)
-        self.sandbox_config = copy.deepcopy(sandbox_config)
-        self.DEFAULT_ENV_VARS = _default_env_vars(self.sandbox_config)
+        self.config = copy.deepcopy(config)
+        self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox)
        atexit.register(self.close_sync)

    async def ainit(self, env_vars: dict[str, str] | None = None) -> None:
--- a/opendevin/runtime/server/runtime.py
+++ b/opendevin/runtime/server/runtime.py
@ -1,6 +1,6 @@
 from typing import Any, Optional

-from opendevin.core.config import SandboxConfig, config
+from opendevin.core.config import AppConfig
 from opendevin.core.exceptions import BrowserInitException
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.action import (
@ -34,49 +34,50 @@ from ..browser import browse
 from .files import read_file, write_file


-def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox:
-    if box_type == 'local':
-        return LocalBox(config=config.sandbox, workspace_base=config.workspace_base)
-    elif box_type == 'ssh':
-        return DockerSSHBox(
-            config=config.sandbox,
-            persist_sandbox=config.persist_sandbox,
-            workspace_mount_path=config.workspace_mount_path,
-            sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
-            cache_dir=config.cache_dir,
-            run_as_devin=config.run_as_devin,
-            ssh_hostname=config.ssh_hostname,
-            ssh_password=config.ssh_password,
-            ssh_port=config.ssh_port,
-            sid=sid,
-        )
-    elif box_type == 'e2b':
-        return E2BBox(
-            config=config.sandbox,
-            e2b_api_key=config.e2b_api_key,
-        )
-    else:
-        raise ValueError(f'Invalid sandbox type: {box_type}')
-
-
 class ServerRuntime(Runtime):
    def __init__(
        self,
-        sandbox_config: SandboxConfig,
+        config: AppConfig,
        event_stream: EventStream,
        sid: str = 'default',
        sandbox: Sandbox | None = None,
    ):
-        super().__init__(sandbox_config, event_stream, sid)
+        super().__init__(config, event_stream, sid)
        self.file_store = LocalFileStore(config.workspace_base)
        if sandbox is None:
-            self.sandbox = create_sandbox(sid, config.sandbox.box_type)
+            self.sandbox = self.create_sandbox(sid, config.sandbox.box_type)
            self._is_external_sandbox = False
        else:
            self.sandbox = sandbox
            self._is_external_sandbox = True
        self.browser: BrowserEnv | None = None

+    def create_sandbox(self, sid: str = 'default', box_type: str = 'ssh') -> Sandbox:
+        if box_type == 'local':
+            return LocalBox(
+                config=self.config.sandbox, workspace_base=self.config.workspace_base
+            )
+        elif box_type == 'ssh':
+            return DockerSSHBox(
+                config=self.config.sandbox,
+                persist_sandbox=self.config.persist_sandbox,
+                workspace_mount_path=self.config.workspace_mount_path,
+                sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox,
+                cache_dir=self.config.cache_dir,
+                run_as_devin=self.config.run_as_devin,
+                ssh_hostname=self.config.ssh_hostname,
+                ssh_password=self.config.ssh_password,
+                ssh_port=self.config.ssh_port,
+                sid=sid,
+            )
+        elif box_type == 'e2b':
+            return E2BBox(
+                config=self.config.sandbox,
+                e2b_api_key=self.config.e2b_api_key,
+            )
+        else:
+            raise ValueError(f'Invalid sandbox type: {box_type}')
+
    async def ainit(self, env_vars: dict[str, str] | None = None):
        # MUST call super().ainit() to initialize both default env vars
        # AND the ones in env vars!
@ -177,8 +178,8 @@ class ServerRuntime(Runtime):
        return await read_file(
            action.path,
            working_dir,
-            config.workspace_base,
-            config.workspace_mount_path_in_sandbox,
+            self.config.workspace_base,
+            self.config.workspace_mount_path_in_sandbox,
            action.start,
            action.end,
        )
@ -189,8 +190,8 @@ class ServerRuntime(Runtime):
        return await write_file(
            action.path,
            working_dir,
-            config.workspace_base,
-            config.workspace_mount_path_in_sandbox,
+            self.config.workspace_base,
+            self.config.workspace_mount_path_in_sandbox,
            action.content,
            action.start,
            action.end,
--- a/opendevin/server/listen.py
+++ b/opendevin/server/listen.py
@ -8,6 +8,7 @@ from pathspec import PathSpec
 from pathspec.patterns import GitWildMatchPattern

 from opendevin.server.data_models.feedback import FeedbackDataModel, store_feedback
+from opendevin.storage import get_file_store

 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
@ -29,7 +30,7 @@ from fastapi.staticfiles import StaticFiles

 import agenthub  # noqa F401 (we import this to get the agents registered)
 from opendevin.controller.agent import Agent
-from opendevin.core.config import LLMConfig, config
+from opendevin.core.config import LLMConfig, load_app_config
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import AgentState  # Add this import
 from opendevin.events.action import ChangeAgentStateAction, NullAction
@ -42,7 +43,9 @@ from opendevin.llm import bedrock
 from opendevin.server.auth import get_sid_from_token, sign_token
 from opendevin.server.session import SessionManager

-session_manager = SessionManager(config)
+config = load_app_config()
+file_store = get_file_store(config.file_store, config.file_store_path)
+session_manager = SessionManager(config, file_store)

 app = FastAPI()
 app.add_middleware(
--- a/opendevin/server/session/agent.py
+++ b/opendevin/server/session/agent.py
@ -4,12 +4,13 @@ from agenthub.codeact_agent.codeact_agent import CodeActAgent
 from opendevin.controller import AgentController
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import LLMConfig, SandboxConfig
+from opendevin.core.config import AppConfig, LLMConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.stream import EventStream
 from opendevin.runtime import DockerSSHBox, get_runtime_cls
 from opendevin.runtime.runtime import Runtime
 from opendevin.runtime.server.runtime import ServerRuntime
+from opendevin.storage.files import FileStore


 class AgentSession:
@ -25,15 +26,16 @@ class AgentSession:
    runtime: Optional[Runtime] = None
    _closed: bool = False

-    def __init__(self, sid):
+    def __init__(self, sid: str, file_store: FileStore):
        """Initializes a new instance of the Session class."""
        self.sid = sid
-        self.event_stream = EventStream(sid)
+        self.event_stream = EventStream(sid, file_store)
+        self.file_store = file_store

    async def start(
        self,
        runtime_name: str,
-        sandbox_config: SandboxConfig,
+        config: AppConfig,
        agent: Agent,
        confirmation_mode: bool,
        max_iterations: int,
@ -49,7 +51,7 @@ class AgentSession:
            raise Exception(
                'Session already started. You need to close this session and start a new one.'
            )
-        await self._create_runtime(runtime_name, sandbox_config)
+        await self._create_runtime(runtime_name, config)
        await self._create_controller(
            agent,
            confirmation_mode,
@ -63,13 +65,13 @@ class AgentSession:
            return
        if self.controller is not None:
            end_state = self.controller.get_state()
-            end_state.save_to_session(self.sid)
+            end_state.save_to_session(self.sid, self.file_store)
            await self.controller.close()
        if self.runtime is not None:
            await self.runtime.close()
        self._closed = True

-    async def _create_runtime(self, runtime_name: str, sandbox_config: SandboxConfig):
+    async def _create_runtime(self, runtime_name: str, config: AppConfig):
        """Creates a runtime instance."""
        if self.runtime is not None:
            raise Exception('Runtime already created')
@ -77,7 +79,7 @@ class AgentSession:
        logger.info(f'Using runtime: {runtime_name}')
        runtime_cls = get_runtime_cls(runtime_name)
        self.runtime = runtime_cls(
-            sandbox_config=sandbox_config, event_stream=self.event_stream, sid=self.sid
+            config=config, event_stream=self.event_stream, sid=self.sid
        )
        await self.runtime.ainit()

@ -121,7 +123,7 @@ class AgentSession:
            headless_mode=False,
        )
        try:
-            agent_state = State.restore_from_session(self.sid)
+            agent_state = State.restore_from_session(self.sid, self.file_store)
            self.controller.set_initial_state(
                agent_state, max_iterations, confirmation_mode
            )
--- a/opendevin/server/session/manager.py
+++ b/opendevin/server/session/manager.py
@ -6,6 +6,7 @@ from fastapi import WebSocket

 from opendevin.core.config import AppConfig
 from opendevin.core.logger import opendevin_logger as logger
+from opendevin.storage.files import FileStore

 from .session import Session

@ -15,14 +16,17 @@ class SessionManager:
    cleanup_interval: int = 300
    session_timeout: int = 600

-    def __init__(self, config: AppConfig):
+    def __init__(self, config: AppConfig, file_store: FileStore):
        asyncio.create_task(self._cleanup_sessions())
        self.config = config
+        self.file_store = file_store

    def add_or_restart_session(self, sid: str, ws_conn: WebSocket) -> Session:
        if sid in self._sessions:
            asyncio.create_task(self._sessions[sid].close())
-        self._sessions[sid] = Session(sid=sid, ws=ws_conn, config=self.config)
+        self._sessions[sid] = Session(
+            sid=sid, file_store=self.file_store, ws=ws_conn, config=self.config
+        )
        return self._sessions[sid]

    def get_session(self, sid: str) -> Session | None:
--- a/opendevin/server/session/session.py
+++ b/opendevin/server/session/session.py
@ -20,6 +20,7 @@ from opendevin.events.observation import (
 from opendevin.events.serialization import event_from_dict, event_to_dict
 from opendevin.events.stream import EventStreamSubscriber
 from opendevin.llm.llm import LLM
+from opendevin.storage.files import FileStore

 from .agent import AgentSession

@ -33,11 +34,13 @@ class Session:
    is_alive: bool = True
    agent_session: AgentSession

-    def __init__(self, sid: str, ws: WebSocket | None, config: AppConfig):
+    def __init__(
+        self, sid: str, ws: WebSocket | None, config: AppConfig, file_store: FileStore
+    ):
        self.sid = sid
        self.websocket = ws
        self.last_active_ts = int(time.time())
-        self.agent_session = AgentSession(sid)
+        self.agent_session = AgentSession(sid, file_store)
        self.agent_session.event_stream.subscribe(
            EventStreamSubscriber.SERVER, self.on_event
        )
@ -102,7 +105,7 @@ class Session:
        try:
            await self.agent_session.start(
                runtime_name=self.config.runtime,
-                sandbox_config=self.config.sandbox,
+                config=self.config,
                agent=agent,
                confirmation_mode=confirmation_mode,
                max_iterations=max_iterations,
--- a/opendevin/storage/init.py
+++ b/opendevin/storage/init.py
@ -1,21 +1,14 @@
-from opendevin.core.config import config
-
 from .files import FileStore
 from .local import LocalFileStore
 from .memory import InMemoryFileStore
 from .s3 import S3FileStore


-def _get_file_store() -> FileStore:
-    if config.file_store == 'local':
-        return LocalFileStore(config.file_store_path)
-    elif config.file_store == 's3':
+def get_file_store(file_store: str, file_store_path: str | None = None) -> FileStore:
+    if file_store == 'local':
+        if file_store_path is None:
+            raise ValueError('file_store_path is required for local file store')
+        return LocalFileStore(file_store_path)
+    elif file_store == 's3':
        return S3FileStore()
    return InMemoryFileStore()
-
-
-singleton = _get_file_store()
-
-
-def get_file_store() -> FileStore:
-    return singleton
--- a/tests/integration/test_agent.py
+++ b/tests/integration/test_agent.py
@ -7,7 +7,7 @@ import pytest

 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import LLMConfig, parse_arguments
+from opendevin.core.config import LLMConfig
 from opendevin.core.main import run_agent_controller
 from opendevin.core.schema import AgentState
 from opendevin.events.action import (
@ -74,10 +74,9 @@ def validate_final_state(final_state: State | None, test_name: str):
 )
 def test_write_simple_script(current_test_name: str) -> None:
    task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
-    args = parse_arguments()

    # Create the agent
-    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))
+    agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig()))

    final_state: State | None = asyncio.run(
        run_agent_controller(
@ -121,7 +120,6 @@ def test_write_simple_script(current_test_name: str) -> None:
    reason='local sandbox shows environment-dependent absolute path for pwd command',
 )
 def test_edits(current_test_name: str):
-    args = parse_arguments()
    # Copy workspace artifacts to workspace_base location
    source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
    files = os.listdir(source_dir)
@ -132,7 +130,7 @@ def test_edits(current_test_name: str):
        shutil.copy(os.path.join(source_dir, file), dest_file)

    # Create the agent
-    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))
+    agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig()))

    # Execute the task
    task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
@ -164,10 +162,8 @@ Enjoy!
    reason='Currently, only ssh sandbox supports stateful tasks',
 )
 def test_ipython(current_test_name: str):
-    args = parse_arguments()
-
    # Create the agent
-    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))
+    agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig()))

    # Execute the task
    task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
@ -199,10 +195,8 @@ def test_ipython(current_test_name: str):
    reason='FIXME: local sandbox does not capture stderr',
 )
 def test_simple_task_rejection(current_test_name: str):
-    args = parse_arguments()
-
    # Create the agent
-    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))
+    agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig()))

    # Give an impossible task to do: cannot write a commit message because
    # the workspace is not a git repo
@ -224,10 +218,8 @@ def test_simple_task_rejection(current_test_name: str):
    reason='Currently, only ssh sandbox supports stateful tasks',
 )
 def test_ipython_module(current_test_name: str):
-    args = parse_arguments()
-
    # Create the agent
-    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))
+    agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig()))

    # Execute the task
    task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
@ -265,10 +257,8 @@ def test_ipython_module(current_test_name: str):
    reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
 )
 def test_browse_internet(http_server, current_test_name: str):
-    args = parse_arguments()
-
    # Create the agent
-    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))
+    agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig()))

    # Execute the task
    task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
--- a/tests/unit/test_event_stream.py
+++ b/tests/unit/test_event_stream.py
@ -1,31 +1,39 @@
 import json
+import pathlib
+import tempfile

 import pytest

 from opendevin.events import EventSource, EventStream
-from opendevin.events.action import NullAction
+from opendevin.events.action import (
+    NullAction,
+)
 from opendevin.events.observation import NullObservation
+from opendevin.storage import get_file_store


@pytest.fixture
-def event_stream():
-    event_stream = EventStream('abc')
-    yield event_stream
-
-    # clear after each test
-    event_stream.clear()
+def temp_dir(monkeypatch):
+    # get a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        pathlib.Path().mkdir(parents=True, exist_ok=True)
+        yield temp_dir


 def collect_events(stream):
    return [event for event in stream.get_events()]


-def test_basic_flow(event_stream: EventStream):
+def test_basic_flow(temp_dir: str):
+    file_store = get_file_store('local', temp_dir)
+    event_stream = EventStream('abc', file_store)
    event_stream.add_event(NullAction(), EventSource.AGENT)
    assert len(collect_events(event_stream)) == 1


-def test_stream_storage(event_stream: EventStream):
+def test_stream_storage(temp_dir: str):
+    file_store = get_file_store('local', temp_dir)
+    event_stream = EventStream('abc', file_store)
    event_stream.add_event(NullObservation(''), EventSource.AGENT)
    assert len(collect_events(event_stream)) == 1
    content = event_stream._file_store.read('sessions/abc/events/0.json')
@ -43,15 +51,17 @@ def test_stream_storage(event_stream: EventStream):
    }


-def test_rehydration(event_stream: EventStream):
+def test_rehydration(temp_dir: str):
+    file_store = get_file_store('local', temp_dir)
+    event_stream = EventStream('abc', file_store)
    event_stream.add_event(NullObservation('obs1'), EventSource.AGENT)
    event_stream.add_event(NullObservation('obs2'), EventSource.AGENT)
    assert len(collect_events(event_stream)) == 2

-    stream2 = EventStream('es2')
+    stream2 = EventStream('es2', file_store)
    assert len(collect_events(stream2)) == 0

-    stream1rehydrated = EventStream('abc')
+    stream1rehydrated = EventStream('abc', file_store)
    events = collect_events(stream1rehydrated)
    assert len(events) == 2
    assert events[0].content == 'obs1'
--- a/tests/unit/test_ipython.py
+++ b/tests/unit/test_ipython.py
@ -4,7 +4,7 @@ from unittest.mock import MagicMock, call, patch

 import pytest

-from opendevin.core.config import SandboxConfig
+from opendevin.core.config import AppConfig, SandboxConfig
 from opendevin.events.action import IPythonRunCellAction
 from opendevin.events.observation import IPythonRunCellObservation
 from opendevin.runtime.server.runtime import ServerRuntime
@ -42,7 +42,9 @@ async def test_run_python_backticks():
    ):
        # Initialize the runtime with the mock event_stream
        runtime = ServerRuntime(
-            sandbox_config=SandboxConfig(box_type='ssh', persist_sandbox=False),
+            config=AppConfig(
+                persist_sandbox=False, sandbox=SandboxConfig(box_type='ssh')
+            ),
            event_stream=mock_event_stream,
        )

--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@ -1,4 +1,5 @@
 import logging
+import tempfile
 from unittest.mock import Mock, patch

 import pytest
@ -17,6 +18,7 @@ from opendevin.events.observation.empty import NullObservation
 from opendevin.events.observation.error import ErrorObservation
 from opendevin.events.stream import EventSource, EventStream
 from opendevin.memory.history import ShortTermHistory
+from opendevin.storage import get_file_store


 def collect_events(stream):
@ -28,11 +30,13 @@ logging.basicConfig(level=logging.DEBUG)

@pytest.fixture
 def event_stream():
-    event_stream = EventStream('asdf')
-    yield event_stream
+    with tempfile.TemporaryDirectory() as temp_dir:
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('asdf', file_store)
+        yield event_stream

-    # clear after each test
-    event_stream.clear()
+        # clear after each test
+        event_stream.clear()


 class TestStuckDetector:
--- a/tests/unit/test_micro_agents.py
+++ b/tests/unit/test_micro_agents.py
@ -1,5 +1,6 @@
 import json
 import os
+import tempfile
 from unittest.mock import MagicMock

 import pytest
@ -12,15 +13,18 @@ from opendevin.events import EventSource
 from opendevin.events.action import MessageAction
 from opendevin.events.stream import EventStream
 from opendevin.memory.history import ShortTermHistory
+from opendevin.storage import get_file_store


@pytest.fixture
 def event_stream():
-    event_stream = EventStream('asdf')
-    yield event_stream
+    with tempfile.TemporaryDirectory() as temp_dir:
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('asdf', file_store)
+        yield event_stream

-    # clear after each test
-    event_stream.clear()
+        # clear after each test
+        event_stream.clear()


 def test_all_agents_are_loaded():
--- a/tests/unit/test_runtime.py
+++ b/tests/unit/test_runtime.py
@ -9,7 +9,7 @@ from unittest.mock import patch

 import pytest

-from opendevin.core.config import SandboxConfig
+from opendevin.core.config import AppConfig, SandboxConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events import EventStream
 from opendevin.events.action import (
@ -21,6 +21,7 @@ from opendevin.events.observation import (
 from opendevin.runtime.client.runtime import EventStreamRuntime
 from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
 from opendevin.runtime.server.runtime import ServerRuntime
+from opendevin.storage import get_file_store


@pytest.fixture(autouse=True)
@ -46,23 +47,31 @@ def box_class(request):
    return request.param


-async def _load_runtime(box_class, event_stream):
+async def _load_runtime(temp_dir, box_class):
    sid = 'test'
+    cli_session = 'main_test'
    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-    sandbox_config = SandboxConfig(
-        use_host_network=True,
+    config = AppConfig(
+        workspace_base=temp_dir,
+        workspace_mount_path=temp_dir,
+        sandbox=SandboxConfig(
+            use_host_network=True,
+        ),
    )
-    container_image = sandbox_config.container_image
+    file_store = get_file_store(config.file_store, config.file_store_path)
+    event_stream = EventStream(cli_session, file_store)
+
+    container_image = config.sandbox.container_image
    # NOTE: we will use the default container image specified in the config.sandbox
    # if it is an official od_runtime image.
    if 'od_runtime' not in container_image:
        container_image = 'ubuntu:22.04'
        logger.warning(
-            f'`{sandbox_config.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.'
+            f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.'
        )
    if box_class == EventStreamRuntime:
        runtime = EventStreamRuntime(
-            sandbox_config=sandbox_config,
+            config=config,
            event_stream=event_stream,
            sid=sid,
            # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
@ -72,9 +81,7 @@ async def _load_runtime(box_class, event_stream):
        )
        await runtime.ainit()
    elif box_class == ServerRuntime:
-        runtime = ServerRuntime(
-            sandbox_config=sandbox_config, event_stream=event_stream, sid=sid
-        )
+        runtime = ServerRuntime(config=config, event_stream=event_stream, sid=sid)
        await runtime.ainit()
        runtime.init_sandbox_plugins(plugins)
        runtime.init_runtime_tools(
@ -89,12 +96,9 @@ async def _load_runtime(box_class, event_stream):


@pytest.mark.asyncio
-async def test_env_vars_os_environ(box_class):
+async def test_env_vars_os_environ(temp_dir, box_class):
    with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
-        cli_session = 'main_test'
-
-        event_stream = EventStream(cli_session)
-        runtime = await _load_runtime(box_class, event_stream)
+        runtime = await _load_runtime(temp_dir, box_class)

        obs: CmdOutputObservation = await runtime.run_action(
            CmdRunAction(command='env')
@ -115,11 +119,8 @@ async def test_env_vars_os_environ(box_class):


@pytest.mark.asyncio
-async def test_env_vars_runtime_add_env_vars(box_class):
-    cli_session = 'main_test'
-
-    event_stream = EventStream(cli_session)
-    runtime = await _load_runtime(box_class, event_stream)
+async def test_env_vars_runtime_add_env_vars(temp_dir, box_class):
+    runtime = await _load_runtime(temp_dir, box_class)
    await runtime.add_env_vars({'QUUX': 'abc"def'})

    obs: CmdOutputObservation = await runtime.run_action(
@ -136,11 +137,8 @@ async def test_env_vars_runtime_add_env_vars(box_class):


@pytest.mark.asyncio
-async def test_env_vars_runtime_add_empty_dict(box_class):
-    cli_session = 'main_test'
-
-    event_stream = EventStream(cli_session)
-    runtime = await _load_runtime(box_class, event_stream)
+async def test_env_vars_runtime_add_empty_dict(temp_dir, box_class):
+    runtime = await _load_runtime(temp_dir, box_class)

    prev_obs = await runtime.run_action(CmdRunAction(command='env'))
    assert prev_obs.exit_code == 0, 'The exit code should be 0.'
@ -160,11 +158,8 @@ async def test_env_vars_runtime_add_empty_dict(box_class):


@pytest.mark.asyncio
-async def test_env_vars_runtime_add_multiple_env_vars(box_class):
-    cli_session = 'main_test'
-
-    event_stream = EventStream(cli_session)
-    runtime = await _load_runtime(box_class, event_stream)
+async def test_env_vars_runtime_add_multiple_env_vars(temp_dir, box_class):
+    runtime = await _load_runtime(temp_dir, box_class)
    await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})

    obs: CmdOutputObservation = await runtime.run_action(
@ -181,12 +176,9 @@ async def test_env_vars_runtime_add_multiple_env_vars(box_class):


@pytest.mark.asyncio
-async def test_env_vars_runtime_add_env_vars_overwrite(box_class):
-    cli_session = 'main_test'
-
+async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class):
    with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
-        event_stream = EventStream(cli_session)
-        runtime = await _load_runtime(box_class, event_stream)
+        runtime = await _load_runtime(temp_dir, box_class)
        await runtime.add_env_vars({'FOOBAR': 'xyz'})

        obs: CmdOutputObservation = await runtime.run_action(
@ -204,10 +196,7 @@ async def test_env_vars_runtime_add_env_vars_overwrite(box_class):

@pytest.mark.asyncio
 async def test_bash_command_pexcept(temp_dir, box_class):
-    cli_session = 'main_test'
-
-    event_stream = EventStream(cli_session)
-    runtime = await _load_runtime(box_class, event_stream)
+    runtime = await _load_runtime(temp_dir, box_class)

    # We set env var PS1="\u@\h:\w $"
    # and construct the PEXCEPT prompt base on it.
--- a/tests/unit/test_sandbox.py
+++ b/tests/unit/test_sandbox.py
@ -11,14 +11,14 @@ from opendevin.runtime.utils import split_bash_commands


 def create_docker_box_from_app_config(
-    path: str, config: AppConfig = None
+    path: str, config: AppConfig | None = None
 ) -> DockerSSHBox:
    if config is None:
        config = AppConfig(
            sandbox=SandboxConfig(
                box_type='ssh',
-                persist_sandbox=False,
-            )
+            ),
+            persist_sandbox=False,
        )
    return DockerSSHBox(
        config=config.sandbox,
@ -305,9 +305,9 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
    config = AppConfig(
        sandbox=SandboxConfig(
            box_type='ssh',
-            persist_sandbox=False,
            enable_auto_lint=False,
-        )
+        ),
+        persist_sandbox=False,
    )
    assert not config.sandbox.enable_auto_lint
    box = create_docker_box_from_app_config(temp_dir, config)
@ -324,9 +324,9 @@ def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
            sandbox=SandboxConfig(
                box_type='ssh',
                container_image=base_sandbox_image,
-                persist_sandbox=False,
                enable_auto_lint=False,
-            )
+            ),
+            persist_sandbox=False,
        )
        assert not config.sandbox.enable_auto_lint
        box = create_docker_box_from_app_config(temp_dir, config)
@ -337,11 +337,20 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir):
    config = AppConfig(
        sandbox=SandboxConfig(
            box_type='ssh',
-            persist_sandbox=False,
-            enable_auto_lint=False,
-        )
+        ),
+        persist_sandbox=False,
+    )
+    box = DockerSSHBox(
+        config=config.sandbox,
+        persist_sandbox=config.persist_sandbox,
+        workspace_mount_path=temp_dir,
+        sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
+        cache_dir=config.cache_dir,
+        run_as_devin=True,
+        ssh_hostname=config.ssh_hostname,
+        ssh_password=config.ssh_password,
+        ssh_port=config.ssh_port,
    )
-    box = create_docker_box_from_app_config(temp_dir, config)
    box.init_plugins([JupyterRequirement])
    test_code = "print('Hello, `World`!')"
    expected_write_command = (