mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
@@ -18,13 +18,7 @@ import agenthub
|
||||
from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import args, config, get_llm_config_arg
|
||||
from opendevin.core.logger import (
|
||||
get_console_handler,
|
||||
get_llm_prompt_file_handler,
|
||||
get_llm_response_file_handler,
|
||||
llm_prompt_logger,
|
||||
llm_response_logger,
|
||||
)
|
||||
from opendevin.core.logger import get_console_handler
|
||||
from opendevin.core.logger import opendevin_logger as logger
|
||||
from opendevin.core.main import main
|
||||
from opendevin.events.action import MessageAction
|
||||
@@ -232,31 +226,6 @@ def process_instance(
|
||||
logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# prompt logger
|
||||
directory = os.path.join(eval_output_dir, 'infer_logs')
|
||||
sid = f'inst_{instance.instance_id}'
|
||||
for handler in llm_prompt_logger.handlers[:]:
|
||||
llm_prompt_logger.removeHandler(handler)
|
||||
prompt_file_handler = get_llm_prompt_file_handler(
|
||||
sid=sid, with_date=False, directory=directory
|
||||
)
|
||||
llm_prompt_logger.addHandler(prompt_file_handler)
|
||||
llm_prompt_logger.setLevel(logging.DEBUG)
|
||||
llm_prompt_logger.propagate = False
|
||||
prompt_file_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
|
||||
# response logger
|
||||
for handler in llm_response_logger.handlers[:]:
|
||||
llm_response_logger.removeHandler(handler)
|
||||
response_file_handler = get_llm_response_file_handler(
|
||||
sid=sid, with_date=False, directory=directory
|
||||
)
|
||||
llm_response_logger.addHandler(response_file_handler)
|
||||
llm_response_logger.setLevel(logging.DEBUG)
|
||||
llm_response_logger.propagate = False
|
||||
response_file_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
|
||||
2
evaluation/swe_bench/scripts/run_infer.sh
Executable file → Normal file
2
evaluation/swe_bench/scripts/run_infer.sh
Executable file → Normal file
@@ -34,7 +34,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
# Default to use Hint
|
||||
if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
export USE_HINT_TEXT=true
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
|
||||
@@ -89,9 +89,6 @@ class SWEBenchSSHBox(DockerSSHBox):
|
||||
try:
|
||||
config.workspace_base = workspace_mount_path
|
||||
config.workspace_mount_path = workspace_mount_path
|
||||
logger.warning(
|
||||
f"{instance['instance_id']} : setting workspace_base and workspace_mount_path to {workspace_mount_path}"
|
||||
)
|
||||
|
||||
# linting python after editing helps LLM fix indentations
|
||||
config.enable_auto_lint = True
|
||||
@@ -156,13 +153,6 @@ class SWEBenchSSHBox(DockerSSHBox):
|
||||
return git_patch
|
||||
|
||||
|
||||
def print_env_vars(sandbox):
|
||||
env_vars = ['REPO_PATH', 'SWE_TASK_DIR', 'TEST_CMD']
|
||||
for var in env_vars:
|
||||
exit_code, output = sandbox.execute(f'echo ${var}')
|
||||
logger.info(f'{var}: {output.strip()}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
||||
# so we don't need to manage file uploading to OpenDevin's repo
|
||||
@@ -170,10 +160,7 @@ if __name__ == '__main__':
|
||||
swe_bench_tests = dataset['test'].to_pandas()
|
||||
|
||||
# INSTANCE_ID = 'django__django-11099'
|
||||
# INSTANCE_ID = 'astropy__astropy-12907'
|
||||
# failures:
|
||||
# INSTANCE_ID = 'psf__requests-2317'
|
||||
INSTANCE_ID = 'scikit-learn__scikit-learn-13142'
|
||||
INSTANCE_ID = 'astropy__astropy-12907'
|
||||
swe_bench_tests = swe_bench_tests[swe_bench_tests['instance_id'] == INSTANCE_ID]
|
||||
EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict()
|
||||
|
||||
@@ -187,47 +174,6 @@ if __name__ == '__main__':
|
||||
assert exit_code == 0, 'Failed to cd $REPO_PATH'
|
||||
logger.info(f'cd $REPO_PATH: {output}')
|
||||
|
||||
print_env_vars(sandbox)
|
||||
|
||||
# Reset the repo
|
||||
exit_code, output = sandbox.execute('git reset --hard')
|
||||
assert exit_code == 0, 'Failed to reset the repo'
|
||||
logger.info(f'git reset --hard: {output}')
|
||||
|
||||
exit_code, output = sandbox.execute('cat $SWE_TASK_DIR/test.patch')
|
||||
logger.info(f'Content of test.patch:\n{output}')
|
||||
|
||||
exit_code, output = sandbox.execute('ls -l $SWE_TASK_DIR/test.patch')
|
||||
logger.info(f'File permissions of test.patch: {output}')
|
||||
|
||||
exit_code, output = sandbox.execute('ls -la $REPO_PATH')
|
||||
logger.info(f'Repository file permissions:\n{output}')
|
||||
|
||||
# exit_code, output = sandbox.execute('ls -la $REPO_PATH/.git')
|
||||
# logger.info(f'Git directory permissions:\n{output}')
|
||||
|
||||
# exit_code, output = sandbox.execute('git --version && git config --list')
|
||||
# logger.info(f'Git version and config:\n{output}')
|
||||
|
||||
# exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch')
|
||||
# logger.info(f'Manual patch application:\n{output}')
|
||||
|
||||
exit_code, output = sandbox.execute(
|
||||
'git apply --verbose $SWE_TASK_DIR/test.patch test_requests.py'
|
||||
)
|
||||
logger.info(f'Applying patch to specific file:\n{output}')
|
||||
|
||||
exit_code, output = sandbox.execute('git status')
|
||||
logger.info(f'Git status before patch:\n{output}')
|
||||
|
||||
# exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch')
|
||||
# logger.info(f'Manual patch application:\n{output}')
|
||||
|
||||
# Reset the repo
|
||||
exit_code, output = sandbox.execute('git reset --hard')
|
||||
assert exit_code == 0, 'Failed to reset the repo'
|
||||
logger.info(f'git reset --hard: {output}')
|
||||
|
||||
# apply test patch
|
||||
exit_code, output = sandbox.execute('git apply $SWE_TASK_DIR/test.patch')
|
||||
assert exit_code == 0, 'Failed to apply test patch'
|
||||
|
||||
@@ -184,15 +184,7 @@ class LlmFileHandler(logging.FileHandler):
|
||||
# LLM prompt and response logging
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
filename,
|
||||
mode='a',
|
||||
encoding='utf-8',
|
||||
with_date: bool = False,
|
||||
delay=False,
|
||||
directory: str | None = None,
|
||||
):
|
||||
def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
|
||||
"""
|
||||
Initializes an instance of LlmFileHandler.
|
||||
|
||||
@@ -204,15 +196,11 @@ class LlmFileHandler(logging.FileHandler):
|
||||
"""
|
||||
self.filename = filename
|
||||
self.message_counter = 1
|
||||
if config.debug and with_date:
|
||||
if config.debug:
|
||||
self.session = datetime.now().strftime('%y-%m-%d_%H-%M')
|
||||
else:
|
||||
self.session = ''
|
||||
self.log_directory = (
|
||||
os.path.join(os.getcwd(), 'logs', 'llm', self.session)
|
||||
if directory is None
|
||||
else directory
|
||||
)
|
||||
self.session = 'default'
|
||||
self.log_directory = os.path.join(os.getcwd(), 'logs', 'llm', self.session)
|
||||
os.makedirs(self.log_directory, exist_ok=True)
|
||||
if not config.debug:
|
||||
# Clear the log directory if not in debug mode
|
||||
@@ -244,33 +232,21 @@ class LlmFileHandler(logging.FileHandler):
|
||||
self.message_counter += 1
|
||||
|
||||
|
||||
def get_llm_prompt_file_handler(
|
||||
sid: str = '', with_date: bool = False, directory: str | None = None
|
||||
):
|
||||
def get_llm_prompt_file_handler():
|
||||
"""
|
||||
Returns a file handler for LLM prompt logging.
|
||||
"""
|
||||
filename = f'prompt_{sid}' if sid else 'prompt'
|
||||
|
||||
llm_prompt_file_handler = LlmFileHandler(
|
||||
filename=filename, with_date=with_date, delay=True, directory=directory
|
||||
)
|
||||
llm_prompt_file_handler = LlmFileHandler('prompt', delay=True)
|
||||
llm_prompt_file_handler.setFormatter(llm_formatter)
|
||||
llm_prompt_file_handler.setLevel(logging.DEBUG)
|
||||
return llm_prompt_file_handler
|
||||
|
||||
|
||||
def get_llm_response_file_handler(
|
||||
sid: str = '', with_date: bool = False, directory: str | None = None
|
||||
):
|
||||
def get_llm_response_file_handler():
|
||||
"""
|
||||
Returns a file handler for LLM response logging.
|
||||
"""
|
||||
filename = f'response_{sid}' if sid else 'response'
|
||||
|
||||
llm_response_file_handler = LlmFileHandler(
|
||||
filename=filename, with_date=with_date, delay=True, directory=directory
|
||||
)
|
||||
llm_response_file_handler = LlmFileHandler('response', delay=True)
|
||||
llm_response_file_handler.setFormatter(llm_formatter)
|
||||
llm_response_file_handler.setLevel(logging.DEBUG)
|
||||
return llm_response_file_handler
|
||||
@@ -279,9 +255,9 @@ def get_llm_response_file_handler(
|
||||
llm_prompt_logger = logging.getLogger('prompt')
|
||||
llm_prompt_logger.propagate = False
|
||||
llm_prompt_logger.setLevel(logging.DEBUG)
|
||||
llm_prompt_logger.addHandler(get_llm_prompt_file_handler(with_date=False))
|
||||
llm_prompt_logger.addHandler(get_llm_prompt_file_handler())
|
||||
|
||||
llm_response_logger = logging.getLogger('response')
|
||||
llm_response_logger.propagate = False
|
||||
llm_response_logger.setLevel(logging.DEBUG)
|
||||
llm_response_logger.addHandler(get_llm_response_file_handler(with_date=False))
|
||||
llm_response_logger.addHandler(get_llm_response_file_handler())
|
||||
|
||||
@@ -14,7 +14,7 @@ def generate_dockerfile_content(base_image: str) -> str:
|
||||
# FIXME: Remove the requirement of ssh in future version
|
||||
dockerfile_content = (
|
||||
f'FROM {base_image}\n'
|
||||
'RUN apt update && apt install -y openssh-server wget sudo net-tools iproute2\n'
|
||||
'RUN apt update && apt install -y openssh-server wget sudo\n'
|
||||
'RUN mkdir -p -m0755 /var/run/sshd\n'
|
||||
'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
|
||||
'RUN { test -d /opendevin/miniforge3 && echo "/opendevin/miniforge3 already in base image"; } || { \\\n'
|
||||
|
||||
@@ -348,18 +348,14 @@ class DockerSSHBox(Sandbox):
|
||||
)
|
||||
# check the miniforge3 directory exist
|
||||
exit_code, logs = self.container.exec_run(
|
||||
[
|
||||
'/bin/bash',
|
||||
'-c',
|
||||
'[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1',
|
||||
],
|
||||
['/bin/bash', '-c', '[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1'],
|
||||
workdir=self.sandbox_workspace_dir,
|
||||
environment=self._env,
|
||||
)
|
||||
if exit_code != 0:
|
||||
if exit_code == 1:
|
||||
raise Exception(
|
||||
'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main'
|
||||
f'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main'
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
@@ -491,17 +487,17 @@ class DockerSSHBox(Sandbox):
|
||||
|
||||
# once out, make sure that we have *every* output, we while loop until we get an empty output
|
||||
while True:
|
||||
# logger.debug('WAITING FOR .prompt()')
|
||||
logger.debug('WAITING FOR .prompt()')
|
||||
self.ssh.sendline('\n')
|
||||
timeout_not_reached = self.ssh.prompt(timeout=1)
|
||||
if not timeout_not_reached:
|
||||
logger.debug('TIMEOUT REACHED')
|
||||
break
|
||||
# logger.debug('WAITING FOR .before')
|
||||
logger.debug('WAITING FOR .before')
|
||||
output = self.ssh.before
|
||||
# logger.debug(
|
||||
# f'WAITING FOR END OF command output ({bool(output)}): {output}'
|
||||
# )
|
||||
logger.debug(
|
||||
f'WAITING FOR END OF command output ({bool(output)}): {output}'
|
||||
)
|
||||
if isinstance(output, str) and output.strip() == '':
|
||||
break
|
||||
command_output += output
|
||||
|
||||
@@ -41,7 +41,7 @@ find_free_port() {
|
||||
local end_port="${2:-65535}"
|
||||
|
||||
for port in $(seq $start_port $end_port); do
|
||||
if ! netstat -tuln | awk '{print $4}' | grep -q ":$port$"; then
|
||||
if ! ss -tuln | awk '{print $5}' | grep -q ":$port$"; then
|
||||
echo $port
|
||||
return
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user