chore(eval): remove old, unused regression test framework under evaluation/regression (#10419)

This commit is contained in:
Engel Nyst 2025-08-16 01:08:23 +02:00 committed by GitHub
parent ab004478f6
commit f7f4fcf98f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 0 additions and 464 deletions

View File

@ -1,2 +0,0 @@
node_modules
outputs

View File

@ -1,70 +0,0 @@
# OpenHands - Regression Test Framework
OpenHands project is an open-source software engineering AI that can solve various software engineering tasks. This repository contains the regression test framework for OpenHands project.
## Running the Tests
To run the tests for OpenHands project, you can use the provided test runner script. Follow these steps:
1. Ensure you have Python 3.6 or higher installed on your system.
2. Install the required dependencies by running the following command in your terminal:
```
pip install -r requirements.txt
```
3. Navigate to the root directory of the project.
4. Run the test suite using the test runner script with the required arguments:
```
python evaluation/regression/run_tests.py --OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxx --model=gpt-4o
```
Replace `sk-xxxxxxxxxxxxxxxxxxxxxx` with your actual OpenAI API key. The default model is `gpt-4o`, but you can specify a different model if needed.
The test runner will discover and execute all the test cases in the `cases/` directory, and display the results of the test suite, including the status of each individual test case and the overall summary.
## Test Case Structure
The test cases for OpenHands project are organized in the `cases/` directory. Each test case has the following structure:
```
cases/
├── hello-world/
│ ├── task.txt
│ ├── outputs/
│ │ └── codeact_agent/
│ │ └── workspace/
│ │ ├── hello_world.sh
│ └── test_hello_world.py
├── create_web_app/
│ ├── task.txt
│ ├── outputs/
│ │ └── codeact_agent/
│ │ └── workspace/
│ │ ├── app.py
│ │ ├── requirements.txt
│ │ ├── static/
│ │ └── templates/
│ └── test_create_web_app.py
└── ...
```
- `task.txt`: This file contains the task description provided by the user.
- `outputs/`: This directory contains the output generated by OpenHands for each agent.
- `outputs/*/workspace/`: This directory contains the actual output files generated by OpenHands.
- `test_*.py`: These are the test scripts that validate the output of OpenHands.
## Adding New Test Cases
To add a new test case to the regression test framework, follow the same steps as described in the previous sections.
## Customizing the Test Cases
The test cases can be customized by modifying the fixtures defined in the `conftest.py` file. The available fixtures are:
- `test_cases_dir`: The directory containing the test cases.
- `task_file`: The path to the `task.txt` file for the current test case.
- `workspace_dir`: The path to the `workspace/` directory for the current test case.
- `model`: The model selected start the generation.
- `run_test_case`: A fixture that runs OpenHands and generates the workspace for the current test case.
You can modify these fixtures to change the behavior of the test cases or add new ones as needed.
If you have any questions or need further assistance, feel free to reach out to the project maintainers.

View File

@ -1 +0,0 @@
Write an API server in node express which responds with a random number, and a frontend in React that displays the next number from the API

View File

@ -1 +0,0 @@
Write a simple hello world server in node Express

View File

@ -1,2 +0,0 @@
#!/usr/bin/env bash
echo "hello world"

View File

@ -1 +0,0 @@
Rewrite the script so that it prints the user's name, using the first argument. If there's no name, default to "world"

View File

@ -1 +0,0 @@
Write a bash script named "hello_world.sh" that prints "Hello, World!"

View File

@ -1,20 +0,0 @@
import os
import pytest
from conftest import agents
@pytest.mark.parametrize('agent', agents())
def test_hello_world(task_file, run_test_case, agent):
"""Test case for the "Hello, World!" Bash script using different agents."""
# Run the test case for the specified agent
workspace_dir = run_test_case(agent, 'hello-world')
# Validate the generated workspace
assert os.path.exists(workspace_dir)
assert os.path.isfile(os.path.join(workspace_dir, 'hello_world.sh'))
# Execute the hello_world.sh script
os.chdir(workspace_dir)
output = os.popen('bash hello_world.sh').read()
assert output == 'Hello, World!\n'

View File

@ -1,2 +0,0 @@
def string_length(s):
return len(s)

View File

@ -1,2 +0,0 @@
def to_lowercase(s):
return s.lower()

View File

@ -1,2 +0,0 @@
def reverse_string(s):
return s[::-1]

View File

@ -1,7 +0,0 @@
import random
def scramble_string(s):
s_list = list(s)
random.shuffle(s_list)
return ''.join(s_list)

View File

@ -1,8 +0,0 @@
def spongebob_case(s):
result = ''
for i, char in enumerate(s):
if i % 2 == 0:
result += char.lower()
else:
result += char.upper()
return result

View File

@ -1,2 +0,0 @@
def to_uppercase(s):
return s.upper()

View File

@ -1,55 +0,0 @@
import sys
def print_help():
help_text = """
Usage: python string_cli.py <command> <string>
Commands:
reverse - Reverses the input string.
uppercase - Converts the input string to uppercase.
lowercase - Converts the input string to lowercase.
spongebob - Converts the input string to spongebob case.
length - Returns the length of the input string.
scramble - Randomly scrambles the characters in the input string.
"""
print(help_text)
if __name__ == '__main__':
if len(sys.argv) == 2 and sys.argv[1] == '--help':
print_help()
sys.exit(0)
elif len(sys.argv) < 3:
print('Usage: python string_cli.py <command> <string>')
sys.exit(1)
command = sys.argv[1]
input_string = sys.argv[2]
if command == 'reverse':
from commands.reverse import reverse_string
print(reverse_string(input_string))
elif command == 'uppercase':
from commands.uppercase import to_uppercase
print(to_uppercase(input_string))
elif command == 'lowercase':
from commands.lowercase import to_lowercase
print(to_lowercase(input_string))
elif command == 'spongebob':
from commands.spongebob import spongebob_case
print(spongebob_case(input_string))
elif command == 'length':
from commands.length import string_length
print(string_length(input_string))
elif command == 'scramble':
from commands.scramble import scramble_string
print(scramble_string(input_string))
else:
print('Invalid command!')

View File

@ -1 +0,0 @@
Please rewrite the entire CLI in node.js

View File

@ -1,2 +0,0 @@
def string_length(s):
return len(s)

View File

@ -1,2 +0,0 @@
def to_lowercase(s):
return s.lower()

View File

@ -1,2 +0,0 @@
def reverse_string(s):
return s[::-1]

View File

@ -1,7 +0,0 @@
import random
def scramble_string(s):
s_list = list(s)
random.shuffle(s_list)
return ''.join(s_list)

View File

@ -1,8 +0,0 @@
def spongebob_case(s):
result = ''
for i, char in enumerate(s):
if i % 2 == 0:
result += char.lower()
else:
result += char.upper()
return result

View File

@ -1,2 +0,0 @@
def to_uppercase(s):
return s.upper()

View File

@ -1,36 +0,0 @@
import sys
if __name__ == '__main__':
if len(sys.argv) < 3:
print('Usage: python string_cli.py <command> <string>')
sys.exit(1)
command = sys.argv[1]
input_string = sys.argv[2]
if command == 'reverse':
from commands.reverse import reverse_string
print(reverse_string(input_string))
elif command == 'uppercase':
from commands.uppercase import to_uppercase
print(to_uppercase(input_string))
elif command == 'lowercase':
from commands.lowercase import to_lowercase
print(to_lowercase(input_string))
elif command == 'spongebob':
from commands.spongebob import spongebob_case
print(spongebob_case(input_string))
elif command == 'length':
from commands.length import string_length
print(string_length(input_string))
elif command == 'scramble':
from commands.scramble import scramble_string
print(scramble_string(input_string))
else:
print('Invalid command!')

View File

@ -1 +0,0 @@
Please add a --help option to the CLI, with a detailed description of each command

View File

@ -1 +0,0 @@
Write a python CLI for string manipulation. The CLI should accept a command, and a string. The commands should include `reverse`, `uppercase`, `lowercase`, `spongebob`, `length`, and `scramble`. The logic for each command should live in its own file.

View File

@ -1 +0,0 @@
Write a simple TODO list application in React

View File

@ -1,21 +0,0 @@
from http.server import BaseHTTPRequestHandler, HTTPServer
class HelloWorldHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write(b'Hello World\n')
def run(server_class=HTTPServer, handler_class=HelloWorldHandler, port=8000):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
print(f'Starting httpd on port {port}...')
httpd.serve_forever()
if __name__ == '__main__':
print('starting server...')
run()

View File

@ -1 +0,0 @@
Make sure the server works and responds appropriately

View File

@ -1,171 +0,0 @@
import datetime
import logging
import os
import shutil
import subprocess
import pytest
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
CASES_DIR = os.path.join(SCRIPT_DIR, 'cases')
AGENTHUB_DIR = os.path.join(SCRIPT_DIR, '../', 'agenthub')
def agents():
"""Retrieves a list of available agents.
Returns:
A list of agent names.
"""
agents = []
for agent in os.listdir(AGENTHUB_DIR):
if os.path.isdir(os.path.join(AGENTHUB_DIR, agent)) and agent.endswith(
'_agent'
):
agents.append(agent)
return agents
@pytest.fixture(scope='session')
def test_cases_dir():
"""Fixture that provides the directory path for test cases.
Returns:
The directory path for test cases.
"""
return CASES_DIR
@pytest.fixture
def task_file(test_cases_dir, request):
"""Fixture that provides the path to the task file for a test case.
Args:
test_cases_dir: The directory path for test cases.
request: The pytest request object.
Returns:
The path to the task file for the test case.
"""
test_case_dir = os.path.dirname(request.module.__file__)
task_file_path = os.path.join(test_case_dir, 'task.txt')
return task_file_path
@pytest.fixture
def workspace_dir(test_cases_dir, request):
"""Fixture that provides the workspace directory for a test case.
Args:
test_cases_dir: The directory path for test cases.
request: The pytest request object.
Returns:
The workspace directory for the test case.
"""
test_case_dir = os.path.dirname(request.module.__file__)
workspace_dir = os.path.join(test_case_dir, 'workspace')
return workspace_dir
@pytest.fixture
def model(request):
"""Fixture that provides the model name.
Args:
request: The pytest request object.
Returns:
The model name, defaulting to "gpt-3.5-turbo".
"""
return request.config.getoption('model', default='gpt-3.5-turbo')
@pytest.fixture
def run_test_case(test_cases_dir, workspace_dir, request):
"""Fixture that provides a function to run a test case.
Args:
test_cases_dir: The directory path for test cases.
workspace_dir: The workspace directory for the test case.
request: The pytest request object.
Returns:
A function that runs a test case for a given agent and case.
"""
def _run_test_case(agent, case):
"""Runs a test case for a given agent.
Args:
agent: The name of the agent to run the test case for.
case: The name of the test case to run.
Returns:
The path to the workspace directory for the agent and test case.
Raises:
AssertionError: If the test case execution fails (non-zero return code).
Steps:
"""
case_dir = os.path.join(test_cases_dir, case)
task = open(os.path.join(case_dir, 'task.txt'), 'r').read().strip()
outputs_dir = os.path.join(case_dir, 'outputs')
agent_dir = os.path.join(outputs_dir, agent)
if not os.path.exists(agent_dir):
os.makedirs(agent_dir)
shutil.rmtree(os.path.join(agent_dir, 'workspace'), ignore_errors=True)
if os.path.isdir(os.path.join(case_dir, 'start')):
os.copytree(
os.path.join(case_dir, 'start'), os.path.join(agent_dir, 'workspace')
)
else:
os.makedirs(os.path.join(agent_dir, 'workspace'))
agents_ref = {
'codeact_agent': 'CodeActAgent',
}
process = subprocess.Popen(
[
'python3',
f'{SCRIPT_DIR}/../../openhands/main.py',
'-d',
f'{os.path.join(agent_dir, "workspace")}',
'-c',
f'{agents_ref[agent]}',
'-t',
f'{task}',
'-m',
'gpt-3.5-turbo',
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stdout, stderr = process.communicate()
logging.info(f'Stdout: {stdout}')
logging.error(f'Stderr: {stderr}')
assert process.returncode == 0
return os.path.join(agent_dir, 'workspace')
return _run_test_case
def pytest_configure(config):
"""Configuration hook for pytest.
Args:
config: The pytest configuration object.
"""
now = datetime.datetime.now()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler(f'test_results_{now.strftime("%Y%m%d_%H%M%S")}.log'),
logging.StreamHandler(),
],
)

View File

@ -1,32 +0,0 @@
import argparse
import pytest
from openhands.config import load_openhands_config
config = load_openhands_config()
if __name__ == '__main__':
"""Main entry point of the script.
This script runs pytest with specific arguments and configuration.
Usage:
python script_name.py [--OPENAI_API_KEY=<api_key>] [--model=<model_name>]
"""
parser = argparse.ArgumentParser(
description='This script runs pytest with specific arguments and configuration.'
)
parser.add_argument(
'--OPENAI_API_KEY', type=str, required=True, help='Your OpenAI API key'
)
parser.add_argument(
'--model', type=str, required=True, help='The model name to use'
)
parser_args = parser.parse_args()
config.config['OPENAI_API_KEY'] = parser_args.OPENAI_API_KEY
args = ['-v', 'evaluation/regression/cases', f'-o model={parser_args.model}']
pytest.main(args)