From 657b177b4ee99d3b0a85e1f2fff22da731956bb6 Mon Sep 17 00:00:00 2001 From: Aleksandar Date: Fri, 10 May 2024 00:11:27 +0100 Subject: [PATCH] Default to less expensive gpt-3.5-turbo model (#1675) --- Makefile | 2 +- evaluation/regression/README.md | 4 ++-- evaluation/regression/conftest.py | 37 ++++++++++++++++++++++++------- opendevin/core/config.py | 2 +- opendevin/server/README.md | 2 +- 5 files changed, 34 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 912a3e7150..66a72779e6 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ BACKEND_PORT = 3000 BACKEND_HOST = "127.0.0.1:$(BACKEND_PORT)" FRONTEND_PORT = 3001 DEFAULT_WORKSPACE_DIR = "./workspace" -DEFAULT_MODEL = "gpt-3.5-turbo-1106" +DEFAULT_MODEL = "gpt-3.5-turbo" CONFIG_FILE = config.toml PRECOMMIT_CONFIG_PATH = "./dev_config/python/.pre-commit-config.yaml" diff --git a/evaluation/regression/README.md b/evaluation/regression/README.md index 5a3945f2e5..1cc17d7a2c 100644 --- a/evaluation/regression/README.md +++ b/evaluation/regression/README.md @@ -14,9 +14,9 @@ To run the tests for OpenDevin project, you can use the provided test runner scr 3. Navigate to the root directory of the project. 4. Run the test suite using the test runner script with the required arguments: ``` - python evaluation/regression/run_tests.py --OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxx --model=gpt-3.5-turbo-1106 + python evaluation/regression/run_tests.py --OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxx --model=gpt-3.5-turbo ``` - Replace `sk-xxxxxxxxxxxxxxxxxxxxxx` with your actual OpenAI API key. The default model is `gpt-3.5-turbo-1106`, but you can specify a different model if needed. + Replace `sk-xxxxxxxxxxxxxxxxxxxxxx` with your actual OpenAI API key. The default model is `gpt-3.5-turbo`, but you can specify a different model if needed. The test runner will discover and execute all the test cases in the `cases/` directory, and display the results of the test suite, including the status of each individual test case and the overall summary. diff --git a/evaluation/regression/conftest.py b/evaluation/regression/conftest.py index 977fc7f559..b0d2484f30 100644 --- a/evaluation/regression/conftest.py +++ b/evaluation/regression/conftest.py @@ -19,7 +19,9 @@ def agents(): """ agents = [] for agent in os.listdir(AGENTHUB_DIR): - if os.path.isdir(os.path.join(AGENTHUB_DIR, agent)) and agent.endswith('_agent'): + if os.path.isdir(os.path.join(AGENTHUB_DIR, agent)) and agent.endswith( + '_agent' + ): agents.append(agent) return agents @@ -74,9 +76,9 @@ def model(request): request: The pytest request object. Returns: - The model name, defaulting to "gpt-3.5-turbo-1106". + The model name, defaulting to "gpt-3.5-turbo". """ - return request.config.getoption('model', default='gpt-3.5-turbo-1106') + return request.config.getoption('model', default='gpt-3.5-turbo') @pytest.fixture @@ -91,6 +93,7 @@ def run_test_case(test_cases_dir, workspace_dir, request): Returns: A function that runs a test case for a given agent and case. """ + def _run_test_case(agent, case): """Runs a test case for a given agent. @@ -116,14 +119,32 @@ def run_test_case(test_cases_dir, workspace_dir, request): shutil.rmtree(os.path.join(agent_dir, 'workspace'), ignore_errors=True) if os.path.isdir(os.path.join(case_dir, 'start')): - os.copytree(os.path.join(case_dir, 'start'), os.path.join(agent_dir, 'workspace')) + os.copytree( + os.path.join(case_dir, 'start'), os.path.join(agent_dir, 'workspace') + ) else: os.makedirs(os.path.join(agent_dir, 'workspace')) agents_ref = { 'monologue_agent': 'MonologueAgent', - 'codeact_agent': 'CodeActAgent' + 'codeact_agent': 'CodeActAgent', } - process = subprocess.Popen(['python3', f'{SCRIPT_DIR}/../../opendevin/main.py', '-d', f"{os.path.join(agent_dir, 'workspace')}", '-c', f'{agents_ref[agent]}', '-t', f'{task}', '-m', 'gpt-3.5-turbo-1106'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + process = subprocess.Popen( + [ + 'python3', + f'{SCRIPT_DIR}/../../opendevin/main.py', + '-d', + f"{os.path.join(agent_dir, 'workspace')}", + '-c', + f'{agents_ref[agent]}', + '-t', + f'{task}', + '-m', + 'gpt-3.5-turbo', + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) stdout, stderr = process.communicate() logging.info(f'Stdout: {stdout}') logging.error(f'Stderr: {stderr}') @@ -146,6 +167,6 @@ def pytest_configure(config): format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.FileHandler(f"test_results_{now.strftime('%Y%m%d_%H%M%S')}.log"), - logging.StreamHandler() - ] + logging.StreamHandler(), + ], ) diff --git a/opendevin/core/config.py b/opendevin/core/config.py index 6ce79e3b07..ab86166080 100644 --- a/opendevin/core/config.py +++ b/opendevin/core/config.py @@ -19,7 +19,7 @@ load_dotenv() @dataclass class LLMConfig(metaclass=Singleton): - model: str = 'gpt-3.5-turbo-1106' + model: str = 'gpt-3.5-turbo' api_key: str | None = None base_url: str | None = None api_version: str | None = None diff --git a/opendevin/server/README.md b/opendevin/server/README.md index 9591393509..48818edf3c 100644 --- a/opendevin/server/README.md +++ b/opendevin/server/README.md @@ -24,7 +24,7 @@ websocat ws://127.0.0.1:3000/ws ```sh LLM_API_KEY=sk-... # Your OpenAI API Key -LLM_MODEL=gpt-3.5-turbo-1106 # Default model for the agent to use +LLM_MODEL=gpt-3.5-turbo # Default model for the agent to use WORKSPACE_BASE=/path/to/your/workspace # Default path to model's workspace ```