Revert "TEMPORARY swe-bench"

This reverts commit e2da7fd27b.
2026-03-22 05:37:20 +08:00 · 2024-06-27 08:44:04 +02:00
parent 8531d3f661
commit 93d0b2af6f
7 changed files with 22 additions and 135 deletions
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -18,13 +18,7 @@ import agenthub
 from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox
 from opendevin.controller.state.state import State
 from opendevin.core.config import args, config, get_llm_config_arg
-from opendevin.core.logger import (
-    get_console_handler,
-    get_llm_prompt_file_handler,
-    get_llm_response_file_handler,
-    llm_prompt_logger,
-    llm_response_logger,
-)
+from opendevin.core.logger import get_console_handler
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.main import main
 from opendevin.events.action import MessageAction
@@ -232,31 +226,6 @@ def process_instance(
            logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        )
        logger.addHandler(file_handler)
-
-        # prompt logger
-        directory = os.path.join(eval_output_dir, 'infer_logs')
-        sid = f'inst_{instance.instance_id}'
-        for handler in llm_prompt_logger.handlers[:]:
-            llm_prompt_logger.removeHandler(handler)
-        prompt_file_handler = get_llm_prompt_file_handler(
-            sid=sid, with_date=False, directory=directory
-        )
-        llm_prompt_logger.addHandler(prompt_file_handler)
-        llm_prompt_logger.setLevel(logging.DEBUG)
-        llm_prompt_logger.propagate = False
-        prompt_file_handler.setFormatter(logging.Formatter('%(message)s'))
-
-        # response logger
-        for handler in llm_response_logger.handlers[:]:
-            llm_response_logger.removeHandler(handler)
-        response_file_handler = get_llm_response_file_handler(
-            sid=sid, with_date=False, directory=directory
-        )
-        llm_response_logger.addHandler(response_file_handler)
-        llm_response_logger.setLevel(logging.DEBUG)
-        llm_response_logger.propagate = False
-        response_file_handler.setFormatter(logging.Formatter('%(message)s'))
-
    else:
        logger.info(f'Starting evaluation for instance {instance.instance_id}.')

--- a/evaluation/swe_bench/scripts/run_infer.sh
+++ b/evaluation/swe_bench/scripts/run_infer.sh
@@ -34,7 +34,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"

 # Default to use Hint
 if [ -z "$USE_HINT_TEXT" ]; then
-  export USE_HINT_TEXT=false
+  export USE_HINT_TEXT=true
 fi
 echo "USE_HINT_TEXT: $USE_HINT_TEXT"
 EVAL_NOTE="$AGENT_VERSION"
--- a/evaluation/swe_bench/swe_env_box.py
+++ b/evaluation/swe_bench/swe_env_box.py
@@ -89,9 +89,6 @@ class SWEBenchSSHBox(DockerSSHBox):
        try:
            config.workspace_base = workspace_mount_path
            config.workspace_mount_path = workspace_mount_path
-            logger.warning(
-                f"{instance['instance_id']} : setting workspace_base and workspace_mount_path to {workspace_mount_path}"
-            )

            # linting python after editing helps LLM fix indentations
            config.enable_auto_lint = True
@@ -156,13 +153,6 @@ class SWEBenchSSHBox(DockerSSHBox):
        return git_patch


-def print_env_vars(sandbox):
-    env_vars = ['REPO_PATH', 'SWE_TASK_DIR', 'TEST_CMD']
-    for var in env_vars:
-        exit_code, output = sandbox.execute(f'echo ${var}')
-        logger.info(f'{var}: {output.strip()}')
-
-
 if __name__ == '__main__':
    # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
    # so we don't need to manage file uploading to OpenDevin's repo
@@ -170,10 +160,7 @@ if __name__ == '__main__':
    swe_bench_tests = dataset['test'].to_pandas()

    # INSTANCE_ID = 'django__django-11099'
-    # INSTANCE_ID = 'astropy__astropy-12907'
-    # failures:
-    # INSTANCE_ID = 'psf__requests-2317'
-    INSTANCE_ID = 'scikit-learn__scikit-learn-13142'
+    INSTANCE_ID = 'astropy__astropy-12907'
    swe_bench_tests = swe_bench_tests[swe_bench_tests['instance_id'] == INSTANCE_ID]
    EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict()

@@ -187,47 +174,6 @@ if __name__ == '__main__':
    assert exit_code == 0, 'Failed to cd $REPO_PATH'
    logger.info(f'cd $REPO_PATH: {output}')

-    print_env_vars(sandbox)
-
-    # Reset the repo
-    exit_code, output = sandbox.execute('git reset --hard')
-    assert exit_code == 0, 'Failed to reset the repo'
-    logger.info(f'git reset --hard: {output}')
-
-    exit_code, output = sandbox.execute('cat $SWE_TASK_DIR/test.patch')
-    logger.info(f'Content of test.patch:\n{output}')
-
-    exit_code, output = sandbox.execute('ls -l $SWE_TASK_DIR/test.patch')
-    logger.info(f'File permissions of test.patch: {output}')
-
-    exit_code, output = sandbox.execute('ls -la $REPO_PATH')
-    logger.info(f'Repository file permissions:\n{output}')
-
-    # exit_code, output = sandbox.execute('ls -la $REPO_PATH/.git')
-    # logger.info(f'Git directory permissions:\n{output}')
-
-    # exit_code, output = sandbox.execute('git --version && git config --list')
-    # logger.info(f'Git version and config:\n{output}')
-
-    # exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch')
-    # logger.info(f'Manual patch application:\n{output}')
-
-    exit_code, output = sandbox.execute(
-        'git apply --verbose $SWE_TASK_DIR/test.patch test_requests.py'
-    )
-    logger.info(f'Applying patch to specific file:\n{output}')
-
-    exit_code, output = sandbox.execute('git status')
-    logger.info(f'Git status before patch:\n{output}')
-
-    # exit_code, output = sandbox.execute('patch -p1 < $SWE_TASK_DIR/test.patch')
-    # logger.info(f'Manual patch application:\n{output}')
-
-    # Reset the repo
-    exit_code, output = sandbox.execute('git reset --hard')
-    assert exit_code == 0, 'Failed to reset the repo'
-    logger.info(f'git reset --hard: {output}')
-
    # apply test patch
    exit_code, output = sandbox.execute('git apply $SWE_TASK_DIR/test.patch')
    assert exit_code == 0, 'Failed to apply test patch'
--- a/opendevin/core/logger.py
+++ b/opendevin/core/logger.py
@@ -184,15 +184,7 @@ class LlmFileHandler(logging.FileHandler):
    # LLM prompt and response logging
    """

-    def __init__(
-        self,
-        filename,
-        mode='a',
-        encoding='utf-8',
-        with_date: bool = False,
-        delay=False,
-        directory: str | None = None,
-    ):
+    def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
        """
        Initializes an instance of LlmFileHandler.

@@ -204,15 +196,11 @@ class LlmFileHandler(logging.FileHandler):
        """
        self.filename = filename
        self.message_counter = 1
-        if config.debug and with_date:
+        if config.debug:
            self.session = datetime.now().strftime('%y-%m-%d_%H-%M')
        else:
-            self.session = ''
-        self.log_directory = (
-            os.path.join(os.getcwd(), 'logs', 'llm', self.session)
-            if directory is None
-            else directory
-        )
+            self.session = 'default'
+        self.log_directory = os.path.join(os.getcwd(), 'logs', 'llm', self.session)
        os.makedirs(self.log_directory, exist_ok=True)
        if not config.debug:
            # Clear the log directory if not in debug mode
@@ -244,33 +232,21 @@ class LlmFileHandler(logging.FileHandler):
        self.message_counter += 1


-def get_llm_prompt_file_handler(
-    sid: str = '', with_date: bool = False, directory: str | None = None
-):
+def get_llm_prompt_file_handler():
    """
    Returns a file handler for LLM prompt logging.
    """
-    filename = f'prompt_{sid}' if sid else 'prompt'
-
-    llm_prompt_file_handler = LlmFileHandler(
-        filename=filename, with_date=with_date, delay=True, directory=directory
-    )
+    llm_prompt_file_handler = LlmFileHandler('prompt', delay=True)
    llm_prompt_file_handler.setFormatter(llm_formatter)
    llm_prompt_file_handler.setLevel(logging.DEBUG)
    return llm_prompt_file_handler


-def get_llm_response_file_handler(
-    sid: str = '', with_date: bool = False, directory: str | None = None
-):
+def get_llm_response_file_handler():
    """
    Returns a file handler for LLM response logging.
    """
-    filename = f'response_{sid}' if sid else 'response'
-
-    llm_response_file_handler = LlmFileHandler(
-        filename=filename, with_date=with_date, delay=True, directory=directory
-    )
+    llm_response_file_handler = LlmFileHandler('response', delay=True)
    llm_response_file_handler.setFormatter(llm_formatter)
    llm_response_file_handler.setLevel(logging.DEBUG)
    return llm_response_file_handler
@@ -279,9 +255,9 @@ def get_llm_response_file_handler(
 llm_prompt_logger = logging.getLogger('prompt')
 llm_prompt_logger.propagate = False
 llm_prompt_logger.setLevel(logging.DEBUG)
-llm_prompt_logger.addHandler(get_llm_prompt_file_handler(with_date=False))
+llm_prompt_logger.addHandler(get_llm_prompt_file_handler())

 llm_response_logger = logging.getLogger('response')
 llm_response_logger.propagate = False
 llm_response_logger.setLevel(logging.DEBUG)
-llm_response_logger.addHandler(get_llm_response_file_handler(with_date=False))
+llm_response_logger.addHandler(get_llm_response_file_handler())
--- a/opendevin/runtime/docker/image_agnostic_util.py
+++ b/opendevin/runtime/docker/image_agnostic_util.py
@@ -14,7 +14,7 @@ def generate_dockerfile_content(base_image: str) -> str:
    # FIXME: Remove the requirement of ssh in future version
    dockerfile_content = (
        f'FROM {base_image}\n'
-        'RUN apt update && apt install -y openssh-server wget sudo net-tools iproute2\n'
+        'RUN apt update && apt install -y openssh-server wget sudo\n'
        'RUN mkdir -p -m0755 /var/run/sshd\n'
        'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
        'RUN { test -d /opendevin/miniforge3 && echo "/opendevin/miniforge3 already in base image"; } || { \\\n'
--- a/opendevin/runtime/docker/ssh_box.py
+++ b/opendevin/runtime/docker/ssh_box.py
@@ -348,18 +348,14 @@ class DockerSSHBox(Sandbox):
                )
            # check the miniforge3 directory exist
            exit_code, logs = self.container.exec_run(
-                [
-                    '/bin/bash',
-                    '-c',
-                    '[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1',
-                ],
+                ['/bin/bash', '-c', '[ -d "/opendevin/miniforge3" ] && exit 0 || exit 1'],
                workdir=self.sandbox_workspace_dir,
                environment=self._env,
            )
            if exit_code != 0:
                if exit_code == 1:
                    raise Exception(
-                        'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main'
+                        f'OPENDEVIN_PYTHON_INTERPRETER is not usable. Please pull the latest Docker image: docker pull ghcr.io/opendevin/sandbox:main'
                    )
                else:
                    raise Exception(
@@ -491,17 +487,17 @@ class DockerSSHBox(Sandbox):

        # once out, make sure that we have *every* output, we while loop until we get an empty output
        while True:
-            # logger.debug('WAITING FOR .prompt()')
+            logger.debug('WAITING FOR .prompt()')
            self.ssh.sendline('\n')
            timeout_not_reached = self.ssh.prompt(timeout=1)
            if not timeout_not_reached:
                logger.debug('TIMEOUT REACHED')
                break
-            # logger.debug('WAITING FOR .before')
+            logger.debug('WAITING FOR .before')
            output = self.ssh.before
-            # logger.debug(
-            #    f'WAITING FOR END OF command output ({bool(output)}): {output}'
-            # )
+            logger.debug(
+                f'WAITING FOR END OF command output ({bool(output)}): {output}'
+            )
            if isinstance(output, str) and output.strip() == '':
                break
            command_output += output
--- a/opendevin/runtime/plugins/jupyter/setup.sh
+++ b/opendevin/runtime/plugins/jupyter/setup.sh
@@ -41,7 +41,7 @@ find_free_port() {
  local end_port="${2:-65535}"

  for port in $(seq $start_port $end_port); do
-    if ! netstat -tuln | awk '{print $4}' | grep -q ":$port$"; then
+    if ! ss -tuln | awk '{print $5}' | grep -q ":$port$"; then
      echo $port
      return
    fi