Revert "TEMPORARY swe-bench"

This reverts commit e2da7fd27b.
This commit is contained in:
Engel Nyst
2024-06-27 08:44:04 +02:00
parent 8531d3f661
commit 93d0b2af6f
7 changed files with 22 additions and 135 deletions

View File

@@ -18,13 +18,7 @@ import agenthub
from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox
from opendevin.controller.state.state import State
from opendevin.core.config import args, config, get_llm_config_arg
from opendevin.core.logger import (
get_console_handler,
get_llm_prompt_file_handler,
get_llm_response_file_handler,
llm_prompt_logger,
llm_response_logger,
)
from opendevin.core.logger import get_console_handler
from opendevin.core.logger import opendevin_logger as logger
from opendevin.core.main import main
from opendevin.events.action import MessageAction
@@ -232,31 +226,6 @@ def process_instance(
logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
)
logger.addHandler(file_handler)
# prompt logger
directory = os.path.join(eval_output_dir, 'infer_logs')
sid = f'inst_{instance.instance_id}'
for handler in llm_prompt_logger.handlers[:]:
llm_prompt_logger.removeHandler(handler)
prompt_file_handler = get_llm_prompt_file_handler(
sid=sid, with_date=False, directory=directory
)
llm_prompt_logger.addHandler(prompt_file_handler)
llm_prompt_logger.setLevel(logging.DEBUG)
llm_prompt_logger.propagate = False
prompt_file_handler.setFormatter(logging.Formatter('%(message)s'))
# response logger
for handler in llm_response_logger.handlers[:]:
llm_response_logger.removeHandler(handler)
response_file_handler = get_llm_response_file_handler(
sid=sid, with_date=False, directory=directory
)
llm_response_logger.addHandler(response_file_handler)
llm_response_logger.setLevel(logging.DEBUG)
llm_response_logger.propagate = False
response_file_handler.setFormatter(logging.Formatter('%(message)s'))
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')

2
evaluation/swe_bench/scripts/run_infer.sh Executable file → Normal file
View File

@@ -34,7 +34,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
# Default to use Hint
if [ -z "$USE_HINT_TEXT" ]; then
export USE_HINT_TEXT=false
export USE_HINT_TEXT=true
fi
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
EVAL_NOTE="$AGENT_VERSION"

View File

@@ -89,9 +89,6 @@ class SWEBenchSSHBox(DockerSSHBox):
try:
config.workspace_base = workspace_mount_path
config.workspace_mount_path = workspace_mount_path
logger.warning(
f"{instance['instance_id']} : setting workspace_base and workspace_mount_path to {workspace_mount_path}"
)
# linting python after editing helps LLM fix indentations
config.enable_auto_lint = True
@@ -156,13 +153,6 @@ class SWEBenchSSHBox(DockerSSHBox):
return git_patch
def print_env_vars(sandbox):
env_vars = ['REPO_PATH', 'SWE_TASK_DIR', 'TEST_CMD']
for var in env_vars:
exit_code, output = sandbox.execute(f'echo ${var}')
logger.info(f'{var}: {output.strip()}')
if __name__ == '__main__':
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
# so we don't need to manage file uploading to OpenDevin's repo
@@ -170,10 +160,7 @@ if __name__ == '__main__':
swe_bench_tests = dataset['test'].to_pandas()
# INSTANCE_ID = 'django__django-11099'
# INSTANCE_ID = 'astropy__astropy-12907'
# failures:
# INSTANCE_ID = 'psf__requests-2317'
INSTANCE_ID = 'scikit-learn__scikit-learn-13142'
INSTANCE_ID = 'astropy__astropy-12907'
swe_bench_tests = swe_bench_tests[swe_bench_tests['instance_id'] == INSTANCE_ID]
EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict()
@@ -187,47 +174,6 @@ if __name__ == '__main__':
assert exit_code == 0, 'Failed to cd $REPO_PATH'
logger.info(f'cd $REPO_PATH: {output}')
print_env_vars(sandbox)
# Reset the repo
exit_code, output = sandbox.execute('git reset --hard')
assert exit_code == 0, 'Failed to reset the repo'
logger.info(f'git reset --hard: {output}')
exit_code, output = sandbox.execute('cat $SWE_TASK_DIR/test.patch')
logger.info(f'Content of test.patch:\n{output}')
exit_code, output = sandbox.execute('ls -l $SWE_TASK_DIR/test.patch')
logger.info(f'File permissions of test.patch: {output}')
exit_code, output = sandbox.execute('ls -la $REPO_PATH')
logger.info(f'Repository file permissions:\n{output}')
# exit_code, output = sandbox.execute('ls -la $REPO_PATH/.git')
# logger.info(f'Git directory permissions:\n{output}')
# exit_code, output = sandbox.execute('git --version && git config --list')
# logger.info(f'Git version and config:\n{output}')
# exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch')
# logger.info(f'Manual patch application:\n{output}')
exit_code, output = sandbox.execute(
'git apply --verbose $SWE_TASK_DIR/test.patch test_requests.py'
)
logger.info(f'Applying patch to specific file:\n{output}')
exit_code, output = sandbox.execute('git status')
logger.info(f'Git status before patch:\n{output}')
# exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch')
# logger.info(f'Manual patch application:\n{output}')
# Reset the repo
exit_code, output = sandbox.execute('git reset --hard')
assert exit_code == 0, 'Failed to reset the repo'
logger.info(f'git reset --hard: {output}')
# apply test patch
exit_code, output = sandbox.execute('git apply $SWE_TASK_DIR/test.patch')
assert exit_code == 0, 'Failed to apply test patch'

View File

@@ -184,15 +184,7 @@ class LlmFileHandler(logging.FileHandler):
# LLM prompt and response logging
"""
def __init__(
self,
filename,
mode='a',
encoding='utf-8',
with_date: bool = False,
delay=False,
directory: str | None = None,
):
def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
"""
Initializes an instance of LlmFileHandler.
@@ -204,15 +196,11 @@ class LlmFileHandler(logging.FileHandler):
"""
self.filename = filename
self.message_counter = 1
if config.debug and with_date:
if config.debug:
self.session = datetime.now().strftime('%y-%m-%d_%H-%M')
else:
self.session = ''
self.log_directory = (
os.path.join(os.getcwd(), 'logs', 'llm', self.session)
if directory is None
else directory
)
self.session = 'default'
self.log_directory = os.path.join(os.getcwd(), 'logs', 'llm', self.session)
os.makedirs(self.log_directory, exist_ok=True)
if not config.debug:
# Clear the log directory if not in debug mode
@@ -244,33 +232,21 @@ class LlmFileHandler(logging.FileHandler):
self.message_counter += 1
def get_llm_prompt_file_handler(
sid: str = '', with_date: bool = False, directory: str | None = None
):
def get_llm_prompt_file_handler():
"""
Returns a file handler for LLM prompt logging.
"""
filename = f'prompt_{sid}' if sid else 'prompt'
llm_prompt_file_handler = LlmFileHandler(
filename=filename, with_date=with_date, delay=True, directory=directory
)
llm_prompt_file_handler = LlmFileHandler('prompt', delay=True)
llm_prompt_file_handler.setFormatter(llm_formatter)
llm_prompt_file_handler.setLevel(logging.DEBUG)
return llm_prompt_file_handler
def get_llm_response_file_handler(
sid: str = '', with_date: bool = False, directory: str | None = None
):
def get_llm_response_file_handler():
"""
Returns a file handler for LLM response logging.
"""
filename = f'response_{sid}' if sid else 'response'
llm_response_file_handler = LlmFileHandler(
filename=filename, with_date=with_date, delay=True, directory=directory
)
llm_response_file_handler = LlmFileHandler('response', delay=True)
llm_response_file_handler.setFormatter(llm_formatter)
llm_response_file_handler.setLevel(logging.DEBUG)
return llm_response_file_handler
@@ -279,9 +255,9 @@ def get_llm_response_file_handler(
llm_prompt_logger = logging.getLogger('prompt')
llm_prompt_logger.propagate = False
llm_prompt_logger.setLevel(logging.DEBUG)
llm_prompt_logger.addHandler(get_llm_prompt_file_handler(with_date=False))
llm_prompt_logger.addHandler(get_llm_prompt_file_handler())
llm_response_logger = logging.getLogger('response')
llm_response_logger.propagate = False
llm_response_logger.setLevel(logging.DEBUG)
llm_response_logger.addHandler(get_llm_response_file_handler(with_date=False))
llm_response_logger.addHandler(get_llm_response_file_handler())

View File

@@ -14,7 +14,7 @@ def generate_dockerfile_content(base_image: str) -> str:
# FIXME: Remove the requirement of ssh in future version
dockerfile_content = (
f'FROM {base_image}\n'
'RUN apt update && apt install -y openssh-server wget sudo net-tools iproute2\n'
'RUN apt update && apt install -y openssh-server wget sudo\n'
'RUN mkdir -p -m0755 /var/run/sshd\n'
'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
'RUN { test -d /opendevin/miniforge3 && echo "/opendevin/miniforge3 already in base image"; } || { \\\n'

View File

@@ -348,18 +348,14 @@ class DockerSSHBox(Sandbox):
)
# check the miniforge3 directory exist
exit_code, logs = self.container.exec_run(
[
'/bin/bash',
'-c',
'[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1',
],
['/bin/bash', '-c', '[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1'],
workdir=self.sandbox_workspace_dir,
environment=self._env,
)
if exit_code != 0:
if exit_code == 1:
raise Exception(
'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main'
f'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main'
)
else:
raise Exception(
@@ -491,17 +487,17 @@ class DockerSSHBox(Sandbox):
# once out, make sure that we have *every* output, we while loop until we get an empty output
while True:
# logger.debug('WAITING FOR .prompt()')
logger.debug('WAITING FOR .prompt()')
self.ssh.sendline('\n')
timeout_not_reached = self.ssh.prompt(timeout=1)
if not timeout_not_reached:
logger.debug('TIMEOUT REACHED')
break
# logger.debug('WAITING FOR .before')
logger.debug('WAITING FOR .before')
output = self.ssh.before
# logger.debug(
# f'WAITING FOR END OF command output ({bool(output)}): {output}'
# )
logger.debug(
f'WAITING FOR END OF command output ({bool(output)}): {output}'
)
if isinstance(output, str) and output.strip() == '':
break
command_output += output

View File

@@ -41,7 +41,7 @@ find_free_port() {
local end_port="${2:-65535}"
for port in $(seq $start_port $end_port); do
if ! netstat -tuln | awk '{print $4}' | grep -q ":$port$"; then
if ! ss -tuln | awk '{print $5}' | grep -q ":$port$"; then
echo $port
return
fi