diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 90d0384df5..386353c858 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -18,13 +18,7 @@ import agenthub from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox from opendevin.controller.state.state import State from opendevin.core.config import args, config, get_llm_config_arg -from opendevin.core.logger import ( - get_console_handler, - get_llm_prompt_file_handler, - get_llm_response_file_handler, - llm_prompt_logger, - llm_response_logger, -) +from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import main from opendevin.events.action import MessageAction @@ -232,31 +226,6 @@ def process_instance( logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') ) logger.addHandler(file_handler) - - # prompt logger - directory = os.path.join(eval_output_dir, 'infer_logs') - sid = f'inst_{instance.instance_id}' - for handler in llm_prompt_logger.handlers[:]: - llm_prompt_logger.removeHandler(handler) - prompt_file_handler = get_llm_prompt_file_handler( - sid=sid, with_date=False, directory=directory - ) - llm_prompt_logger.addHandler(prompt_file_handler) - llm_prompt_logger.setLevel(logging.DEBUG) - llm_prompt_logger.propagate = False - prompt_file_handler.setFormatter(logging.Formatter('%(message)s')) - - # response logger - for handler in llm_response_logger.handlers[:]: - llm_response_logger.removeHandler(handler) - response_file_handler = get_llm_response_file_handler( - sid=sid, with_date=False, directory=directory - ) - llm_response_logger.addHandler(response_file_handler) - llm_response_logger.setLevel(logging.DEBUG) - llm_response_logger.propagate = False - response_file_handler.setFormatter(logging.Formatter('%(message)s')) - else: logger.info(f'Starting evaluation for instance {instance.instance_id}.') diff --git a/evaluation/swe_bench/scripts/run_infer.sh b/evaluation/swe_bench/scripts/run_infer.sh old mode 100755 new mode 100644 index 6c72db0a3b..17fde504a4 --- a/evaluation/swe_bench/scripts/run_infer.sh +++ b/evaluation/swe_bench/scripts/run_infer.sh @@ -34,7 +34,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG" # Default to use Hint if [ -z "$USE_HINT_TEXT" ]; then - export USE_HINT_TEXT=false + export USE_HINT_TEXT=true fi echo "USE_HINT_TEXT: $USE_HINT_TEXT" EVAL_NOTE="$AGENT_VERSION" diff --git a/evaluation/swe_bench/swe_env_box.py b/evaluation/swe_bench/swe_env_box.py index 435d27f51e..3f02696ec7 100644 --- a/evaluation/swe_bench/swe_env_box.py +++ b/evaluation/swe_bench/swe_env_box.py @@ -89,9 +89,6 @@ class SWEBenchSSHBox(DockerSSHBox): try: config.workspace_base = workspace_mount_path config.workspace_mount_path = workspace_mount_path - logger.warning( - f"{instance['instance_id']} : setting workspace_base and workspace_mount_path to {workspace_mount_path}" - ) # linting python after editing helps LLM fix indentations config.enable_auto_lint = True @@ -156,13 +153,6 @@ class SWEBenchSSHBox(DockerSSHBox): return git_patch -def print_env_vars(sandbox): - env_vars = ['REPO_PATH', 'SWE_TASK_DIR', 'TEST_CMD'] - for var in env_vars: - exit_code, output = sandbox.execute(f'echo ${var}') - logger.info(f'{var}: {output.strip()}') - - if __name__ == '__main__': # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing # so we don't need to manage file uploading to OpenDevin's repo @@ -170,10 +160,7 @@ if __name__ == '__main__': swe_bench_tests = dataset['test'].to_pandas() # INSTANCE_ID = 'django__django-11099' - # INSTANCE_ID = 'astropy__astropy-12907' - # failures: - # INSTANCE_ID = 'psf__requests-2317' - INSTANCE_ID = 'scikit-learn__scikit-learn-13142' + INSTANCE_ID = 'astropy__astropy-12907' swe_bench_tests = swe_bench_tests[swe_bench_tests['instance_id'] == INSTANCE_ID] EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict() @@ -187,47 +174,6 @@ if __name__ == '__main__': assert exit_code == 0, 'Failed to cd $REPO_PATH' logger.info(f'cd $REPO_PATH: {output}') - print_env_vars(sandbox) - - # Reset the repo - exit_code, output = sandbox.execute('git reset --hard') - assert exit_code == 0, 'Failed to reset the repo' - logger.info(f'git reset --hard: {output}') - - exit_code, output = sandbox.execute('cat $SWE_TASK_DIR/test.patch') - logger.info(f'Content of test.patch:\n{output}') - - exit_code, output = sandbox.execute('ls -l $SWE_TASK_DIR/test.patch') - logger.info(f'File permissions of test.patch: {output}') - - exit_code, output = sandbox.execute('ls -la $REPO_PATH') - logger.info(f'Repository file permissions:\n{output}') - - # exit_code, output = sandbox.execute('ls -la $REPO_PATH/.git') - # logger.info(f'Git directory permissions:\n{output}') - - # exit_code, output = sandbox.execute('git --version && git config --list') - # logger.info(f'Git version and config:\n{output}') - - # exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch') - # logger.info(f'Manual patch application:\n{output}') - - exit_code, output = sandbox.execute( - 'git apply --verbose $SWE_TASK_DIR/test.patch test_requests.py' - ) - logger.info(f'Applying patch to specific file:\n{output}') - - exit_code, output = sandbox.execute('git status') - logger.info(f'Git status before patch:\n{output}') - - # exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch') - # logger.info(f'Manual patch application:\n{output}') - - # Reset the repo - exit_code, output = sandbox.execute('git reset --hard') - assert exit_code == 0, 'Failed to reset the repo' - logger.info(f'git reset --hard: {output}') - # apply test patch exit_code, output = sandbox.execute('git apply $SWE_TASK_DIR/test.patch') assert exit_code == 0, 'Failed to apply test patch' diff --git a/opendevin/core/logger.py b/opendevin/core/logger.py index 52e3204d5a..737c762508 100644 --- a/opendevin/core/logger.py +++ b/opendevin/core/logger.py @@ -184,15 +184,7 @@ class LlmFileHandler(logging.FileHandler): # LLM prompt and response logging """ - def __init__( - self, - filename, - mode='a', - encoding='utf-8', - with_date: bool = False, - delay=False, - directory: str | None = None, - ): + def __init__(self, filename, mode='a', encoding='utf-8', delay=False): """ Initializes an instance of LlmFileHandler. @@ -204,15 +196,11 @@ class LlmFileHandler(logging.FileHandler): """ self.filename = filename self.message_counter = 1 - if config.debug and with_date: + if config.debug: self.session = datetime.now().strftime('%y-%m-%d_%H-%M') else: - self.session = '' - self.log_directory = ( - os.path.join(os.getcwd(), 'logs', 'llm', self.session) - if directory is None - else directory - ) + self.session = 'default' + self.log_directory = os.path.join(os.getcwd(), 'logs', 'llm', self.session) os.makedirs(self.log_directory, exist_ok=True) if not config.debug: # Clear the log directory if not in debug mode @@ -244,33 +232,21 @@ class LlmFileHandler(logging.FileHandler): self.message_counter += 1 -def get_llm_prompt_file_handler( - sid: str = '', with_date: bool = False, directory: str | None = None -): +def get_llm_prompt_file_handler(): """ Returns a file handler for LLM prompt logging. """ - filename = f'prompt_{sid}' if sid else 'prompt' - - llm_prompt_file_handler = LlmFileHandler( - filename=filename, with_date=with_date, delay=True, directory=directory - ) + llm_prompt_file_handler = LlmFileHandler('prompt', delay=True) llm_prompt_file_handler.setFormatter(llm_formatter) llm_prompt_file_handler.setLevel(logging.DEBUG) return llm_prompt_file_handler -def get_llm_response_file_handler( - sid: str = '', with_date: bool = False, directory: str | None = None -): +def get_llm_response_file_handler(): """ Returns a file handler for LLM response logging. """ - filename = f'response_{sid}' if sid else 'response' - - llm_response_file_handler = LlmFileHandler( - filename=filename, with_date=with_date, delay=True, directory=directory - ) + llm_response_file_handler = LlmFileHandler('response', delay=True) llm_response_file_handler.setFormatter(llm_formatter) llm_response_file_handler.setLevel(logging.DEBUG) return llm_response_file_handler @@ -279,9 +255,9 @@ def get_llm_response_file_handler( llm_prompt_logger = logging.getLogger('prompt') llm_prompt_logger.propagate = False llm_prompt_logger.setLevel(logging.DEBUG) -llm_prompt_logger.addHandler(get_llm_prompt_file_handler(with_date=False)) +llm_prompt_logger.addHandler(get_llm_prompt_file_handler()) llm_response_logger = logging.getLogger('response') llm_response_logger.propagate = False llm_response_logger.setLevel(logging.DEBUG) -llm_response_logger.addHandler(get_llm_response_file_handler(with_date=False)) +llm_response_logger.addHandler(get_llm_response_file_handler()) diff --git a/opendevin/runtime/docker/image_agnostic_util.py b/opendevin/runtime/docker/image_agnostic_util.py index 2345d8925c..151db29863 100644 --- a/opendevin/runtime/docker/image_agnostic_util.py +++ b/opendevin/runtime/docker/image_agnostic_util.py @@ -14,7 +14,7 @@ def generate_dockerfile_content(base_image: str) -> str: # FIXME: Remove the requirement of ssh in future version dockerfile_content = ( f'FROM {base_image}\n' - 'RUN apt update && apt install -y openssh-server wget sudo net-tools iproute2\n' + 'RUN apt update && apt install -y openssh-server wget sudo\n' 'RUN mkdir -p -m0755 /var/run/sshd\n' 'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n' 'RUN { test -d /opendevin/miniforge3 && echo "/opendevin/miniforge3 already in base image"; } || { \\\n' diff --git a/opendevin/runtime/docker/ssh_box.py b/opendevin/runtime/docker/ssh_box.py index 7862ca49bb..510a87615b 100644 --- a/opendevin/runtime/docker/ssh_box.py +++ b/opendevin/runtime/docker/ssh_box.py @@ -348,18 +348,14 @@ class DockerSSHBox(Sandbox): ) # check the miniforge3 directory exist exit_code, logs = self.container.exec_run( - [ - '/bin/bash', - '-c', - '[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1', - ], + ['/bin/bash', '-c', '[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1'], workdir=self.sandbox_workspace_dir, environment=self._env, ) if exit_code != 0: if exit_code == 1: raise Exception( - 'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main' + f'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main' ) else: raise Exception( @@ -491,17 +487,17 @@ class DockerSSHBox(Sandbox): # once out, make sure that we have *every* output, we while loop until we get an empty output while True: - # logger.debug('WAITING FOR .prompt()') + logger.debug('WAITING FOR .prompt()') self.ssh.sendline('\n') timeout_not_reached = self.ssh.prompt(timeout=1) if not timeout_not_reached: logger.debug('TIMEOUT REACHED') break - # logger.debug('WAITING FOR .before') + logger.debug('WAITING FOR .before') output = self.ssh.before - # logger.debug( - # f'WAITING FOR END OF command output ({bool(output)}): {output}' - # ) + logger.debug( + f'WAITING FOR END OF command output ({bool(output)}): {output}' + ) if isinstance(output, str) and output.strip() == '': break command_output += output diff --git a/opendevin/runtime/plugins/jupyter/setup.sh b/opendevin/runtime/plugins/jupyter/setup.sh index 5e8d06b682..e54649303e 100755 --- a/opendevin/runtime/plugins/jupyter/setup.sh +++ b/opendevin/runtime/plugins/jupyter/setup.sh @@ -41,7 +41,7 @@ find_free_port() { local end_port="${2:-65535}" for port in $(seq $start_port $end_port); do - if ! netstat -tuln | awk '{print $4}' | grep -q ":$port$"; then + if ! ss -tuln | awk '{print $5}' | grep -q ":$port$"; then echo $port return fi