Default to less expensive gpt-3.5-turbo model (#1675)

This commit is contained in:
Aleksandar 2024-05-10 00:11:27 +01:00 committed by GitHub
parent 564739d1db
commit 657b177b4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 34 additions and 13 deletions

View File

@ -7,7 +7,7 @@ BACKEND_PORT = 3000
BACKEND_HOST = "127.0.0.1:$(BACKEND_PORT)"
FRONTEND_PORT = 3001
DEFAULT_WORKSPACE_DIR = "./workspace"
DEFAULT_MODEL = "gpt-3.5-turbo-1106"
DEFAULT_MODEL = "gpt-3.5-turbo"
CONFIG_FILE = config.toml
PRECOMMIT_CONFIG_PATH = "./dev_config/python/.pre-commit-config.yaml"

View File

@ -14,9 +14,9 @@ To run the tests for OpenDevin project, you can use the provided test runner scr
3. Navigate to the root directory of the project.
4. Run the test suite using the test runner script with the required arguments:
```
python evaluation/regression/run_tests.py --OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxx --model=gpt-3.5-turbo-1106
python evaluation/regression/run_tests.py --OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxx --model=gpt-3.5-turbo
```
Replace `sk-xxxxxxxxxxxxxxxxxxxxxx` with your actual OpenAI API key. The default model is `gpt-3.5-turbo-1106`, but you can specify a different model if needed.
Replace `sk-xxxxxxxxxxxxxxxxxxxxxx` with your actual OpenAI API key. The default model is `gpt-3.5-turbo`, but you can specify a different model if needed.
The test runner will discover and execute all the test cases in the `cases/` directory, and display the results of the test suite, including the status of each individual test case and the overall summary.

View File

@ -19,7 +19,9 @@ def agents():
"""
agents = []
for agent in os.listdir(AGENTHUB_DIR):
if os.path.isdir(os.path.join(AGENTHUB_DIR, agent)) and agent.endswith('_agent'):
if os.path.isdir(os.path.join(AGENTHUB_DIR, agent)) and agent.endswith(
'_agent'
):
agents.append(agent)
return agents
@ -74,9 +76,9 @@ def model(request):
request: The pytest request object.
Returns:
The model name, defaulting to "gpt-3.5-turbo-1106".
The model name, defaulting to "gpt-3.5-turbo".
"""
return request.config.getoption('model', default='gpt-3.5-turbo-1106')
return request.config.getoption('model', default='gpt-3.5-turbo')
@pytest.fixture
@ -91,6 +93,7 @@ def run_test_case(test_cases_dir, workspace_dir, request):
Returns:
A function that runs a test case for a given agent and case.
"""
def _run_test_case(agent, case):
"""Runs a test case for a given agent.
@ -116,14 +119,32 @@ def run_test_case(test_cases_dir, workspace_dir, request):
shutil.rmtree(os.path.join(agent_dir, 'workspace'), ignore_errors=True)
if os.path.isdir(os.path.join(case_dir, 'start')):
os.copytree(os.path.join(case_dir, 'start'), os.path.join(agent_dir, 'workspace'))
os.copytree(
os.path.join(case_dir, 'start'), os.path.join(agent_dir, 'workspace')
)
else:
os.makedirs(os.path.join(agent_dir, 'workspace'))
agents_ref = {
'monologue_agent': 'MonologueAgent',
'codeact_agent': 'CodeActAgent'
'codeact_agent': 'CodeActAgent',
}
process = subprocess.Popen(['python3', f'{SCRIPT_DIR}/../../opendevin/main.py', '-d', f"{os.path.join(agent_dir, 'workspace')}", '-c', f'{agents_ref[agent]}', '-t', f'{task}', '-m', 'gpt-3.5-turbo-1106'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
process = subprocess.Popen(
[
'python3',
f'{SCRIPT_DIR}/../../opendevin/main.py',
'-d',
f"{os.path.join(agent_dir, 'workspace')}",
'-c',
f'{agents_ref[agent]}',
'-t',
f'{task}',
'-m',
'gpt-3.5-turbo',
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stdout, stderr = process.communicate()
logging.info(f'Stdout: {stdout}')
logging.error(f'Stderr: {stderr}')
@ -146,6 +167,6 @@ def pytest_configure(config):
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler(f"test_results_{now.strftime('%Y%m%d_%H%M%S')}.log"),
logging.StreamHandler()
]
logging.StreamHandler(),
],
)

View File

@ -19,7 +19,7 @@ load_dotenv()
@dataclass
class LLMConfig(metaclass=Singleton):
model: str = 'gpt-3.5-turbo-1106'
model: str = 'gpt-3.5-turbo'
api_key: str | None = None
base_url: str | None = None
api_version: str | None = None

View File

@ -24,7 +24,7 @@ websocat ws://127.0.0.1:3000/ws
```sh
LLM_API_KEY=sk-... # Your OpenAI API Key
LLM_MODEL=gpt-3.5-turbo-1106 # Default model for the agent to use
LLM_MODEL=gpt-3.5-turbo # Default model for the agent to use
WORKSPACE_BASE=/path/to/your/workspace # Default path to model's workspace
```