diff --git a/.gitattributes b/.gitattributes index 9030923a78..5be91f972c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -*.ipynb linguist-vendored \ No newline at end of file +*.ipynb linguist-vendored diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index dd9b24e9f3..deffe5c62e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -35,4 +35,8 @@ jobs: - name: Install pre-commit run: pip install pre-commit==3.7.0 - name: Run pre-commit hooks - run: pre-commit run --files opendevin/**/* agenthub/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml + run: | + pre-commit run \ + --all-files \ + --show-diff-on-failure \ + --config ./dev_config/python/.pre-commit-config.yaml diff --git a/Makefile b/Makefile index 25bd00eb42..341cec3d90 100644 --- a/Makefile +++ b/Makefile @@ -149,7 +149,7 @@ install-precommit-hooks: lint: @echo "$(YELLOW)Running linters...$(RESET)" - @poetry run pre-commit run --files opendevin/**/* agenthub/**/* --show-diff-on-failure --config $(PRECOMMIT_CONFIG_PATH) + @poetry run pre-commit run --all-files --show-diff-on-failure --config $(PRECOMMIT_CONFIG_PATH) build-frontend: @echo "$(YELLOW)Building frontend...$(RESET)" diff --git a/agenthub/SWE_agent/parser.py b/agenthub/SWE_agent/parser.py index 39fa158afc..3a8ca180b5 100644 --- a/agenthub/SWE_agent/parser.py +++ b/agenthub/SWE_agent/parser.py @@ -20,7 +20,7 @@ no_open_file_error = AgentEchoAction( def invalid_error(cmd, docs): - return f'''ERROR: + return f"""ERROR: Invalid command structure for ``` {cmd} @@ -30,7 +30,7 @@ If so, try again by running only one of the commands: Try again using this format: {COMMAND_USAGE[docs]} -''' +""" def get_action_from_string(command_string: str, path: str, line: int, thoughts: str = '') -> Action | None: diff --git a/agenthub/SWE_agent/prompts.py b/agenthub/SWE_agent/prompts.py index 71117fed73..e4ed57396f 100644 --- a/agenthub/SWE_agent/prompts.py +++ b/agenthub/SWE_agent/prompts.py @@ -1,7 +1,7 @@ DEFAULT_COMMANDS_DICT = { 'exit': 'Executed when task is complete', - 'read [] []': 'Shows a given file\'s contents starting from up to . Default: start_line = 0, end_line = -1. By default the whole file will be read.', + 'read [] []': "Shows a given file's contents starting from up to . Default: start_line = 0, end_line = -1. By default the whole file will be read.", 'write [] []': 'Modifies a by replacing the current lines between and with . Default start_line = 0 and end_line = -1. Calling this with no line args will replace the whole file.', 'browse ': 'Returns the text version of any url, this can be useful to look up documentation or finding issues on github', 'scroll_up': 'Takes no arguments. This will scroll up and show you the 100 lines above your current lines', @@ -16,7 +16,7 @@ DEFAULT_COMMANDS_DICT = { COMMAND_USAGE = { 'exit': 'Usage:\n```\nexit\n```\nExecuted when task is complete', - 'read': 'Args:\n [] []\nUsage:\n```\nread file.py\n```\nor\n```\nread example.py \n```\nShows a given file\'s contents starting from up to . Default: start_line = 0, end_line = -1. by default the whole file will be read.', + 'read': "Args:\n [] []\nUsage:\n```\nread file.py\n```\nor\n```\nread example.py \n```\nShows a given file's contents starting from up to . Default: start_line = 0, end_line = -1. by default the whole file will be read.", 'write': 'Args:\n [] []\nUsage:\n```\nwrite "def main():\n print("This is line one")" 0 2\n```\nModifies a by replacing the current lines between and with . Default start_line = 0 and end_line = -1. Calling this with no line args will replace the whole file.', 'edit': 'Args:\n \nUsage:\n```\nedit 0 1 import pandas as pd\n```\nThis will modify the current file you are in with the changes you make between the line numbers you designate', 'goto': 'Args:\n\nUsage:\n```\ngoto \n```\nThis will show you the 100 lines below and including the line you specify within your current file.', @@ -52,7 +52,7 @@ To modify the current file use 'edit'. To move through the current file use 'got when using write and edit do not surround the code with any "" just write the code. """ -GENERAL_GUIDELINES = '''INSTRUCTIONS: +GENERAL_GUIDELINES = """INSTRUCTIONS: Now, you're going to solve this issue on your own. You can use any bash commands or custom commands you wish to complete your task. Edit all the files you need to and run any checks or tests that you want. Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. When you're satisfied with all of the changes you've made, you can indicate that you are done by running the exit command. @@ -69,9 +69,9 @@ IMPORTANT TIPS: 5. Understand your context: Always make sure to look at the currently open file and the current working directory. The currently open file might be in a different directory than the working directory. 6. Verify your edits: When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. 7. Thoroughly test your solution: After making any changes to fix a bug, be sure to thoroughly test your solution to ensure the bug has been resolved. Re-run the bug reproduction script and verify that the issue has been addressed. -''' +""" -RESPONSE_FORMAT = '''RESPONSE FORMAT: +RESPONSE_FORMAT = """RESPONSE FORMAT: This is the format of the response you will make in order to solve the current issue. You will be given multiple iterations to complete this task so break it into steps and solve them one by one. @@ -113,9 +113,9 @@ Action: [ END FORMAT ] Do not provide anything extra just your thought and action. -''' +""" -SYSTEM_MESSAGE = f'''SYSTEM INFO: +SYSTEM_MESSAGE = f"""SYSTEM INFO: You am an autonomous coding agent, here to provide solutions for coding issues. You have been designed to assist you with a wide range of programming tasks, from code editing and debugging to testing and deployment. You have access to a variety of tools and commands that you can use to help you solve problems efficiently. @@ -123,10 +123,10 @@ You have access to a variety of tools and commands that you can use to help you {GENERAL_GUIDELINES} {DOCUMENTATION} -'''.strip() +""".strip() -def NO_ACTION(latest): return f''' +def NO_ACTION(latest): return f""" You did not include any action to take in your most recent output: ===== Output ====== @@ -141,20 +141,20 @@ This time, be sure to use the exact format below, replacing anything in <> with {RESPONSE_FORMAT} It is crucial you use the format provided as the output will be parsed automatically. -''' +""" def file_info(file: str, line: int): if file: - return f'''CURRENT WORKSPACE: + return f"""CURRENT WORKSPACE: Open File: {file} on line {line} You can use these commands with the current file: Navigation: `scroll_up`, `scroll_down`, and `goto ` Modification: `edit ` - ''' + """ -def STEP_PROMPT(task, file, line_num): return f''' +def STEP_PROMPT(task, file, line_num): return f""" {RESPONSE_FORMAT} You are currently trying to complete this task: {task} @@ -168,7 +168,7 @@ Be very strict about the formatting that you use and make sure you follow the gu NEVER output multiple commands. ONLY take ONE STEP at a time. When you have completed your task run the "exit" command. Begin with your thought about the next step and then come up with an action to perform your thought. -'''.strip() +""".strip() def unpack_dict(data: dict, restrict: list[str] = []): @@ -185,13 +185,13 @@ def unpack_dict(data: dict, restrict: list[str] = []): return '\n'.join(lines) -def MEMORY_FORMAT(act, obs): return f''' +def MEMORY_FORMAT(act, obs): return f""" Previous Action: {unpack_dict(act, ["content"])} Output from Action: {unpack_dict(obs)} -'''.strip() +""".strip() def CONTEXT_PROMPT(memory, window): diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py index 01bddbd13e..2b97b88348 100644 --- a/agenthub/planner_agent/prompt.py +++ b/agenthub/planner_agent/prompt.py @@ -134,12 +134,12 @@ def get_hint(latest_action_id: str) -> str: """ Returns action type hint based on given action_id """ hints = { - '': 'You haven\'t taken any actions yet. Start by using `ls` to check out what files you\'re working with.', + '': "You haven't taken any actions yet. Start by using `ls` to check out what files you're working with.", ActionType.RUN: 'You should think about the command you just ran, what output it gave, and how that affects your plan.', ActionType.READ: 'You should think about the file you just read, what you learned from it, and how that affects your plan.', ActionType.WRITE: 'You just changed a file. You should think about how it affects your plan.', ActionType.BROWSE: 'You should think about the page you just visited, and what you learned from it.', - ActionType.THINK: 'Look at your last thought in the history above. What does it suggest? Don\'t think anymore--take action.', + ActionType.THINK: "Look at your last thought in the history above. What does it suggest? Don't think anymore--take action.", ActionType.RECALL: 'You should think about the information you just recalled, and how it should affect your plan.', ActionType.ADD_TASK: 'You should think about the next action to take.', ActionType.MODIFY_TASK: 'You should think about the next action to take.', diff --git a/dev_config/python/.pre-commit-config.yaml b/dev_config/python/.pre-commit-config.yaml index 591b3b8714..50d62f9aea 100644 --- a/dev_config/python/.pre-commit-config.yaml +++ b/dev_config/python/.pre-commit-config.yaml @@ -7,19 +7,6 @@ repos: - id: check-yaml - id: debug-statements - - repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 - hooks: - - id: flake8 - args: ['--select=Q000'] # Q000 is the error code for single quote enforcement - additional_dependencies: - - flake8-quotes - - - repo: https://github.com/hhatto/autopep8 - rev: v2.1.0 - hooks: - - id: autopep8 - - repo: https://github.com/asottile/setup-cfg-fmt rev: v2.5.0 hooks: diff --git a/dev_config/python/ruff.toml b/dev_config/python/ruff.toml index bceca9293a..06ba2d3ace 100644 --- a/dev_config/python/ruff.toml +++ b/dev_config/python/ruff.toml @@ -1,3 +1,20 @@ exclude = [ "agenthub/monologue_agent/regression/", -] \ No newline at end of file +] + +[lint] +select = [ + "E", + "W", + "F", + "Q", +] + +ignore = [ + "E501", +] + +flake8-quotes = {inline-quotes = "single"} + +[format] +quote-style = "single" diff --git a/docs/architecture/Architecture.md b/docs/architecture/Architecture.md index 25e9a65723..33661a7337 100644 --- a/docs/architecture/Architecture.md +++ b/docs/architecture/Architecture.md @@ -11,4 +11,4 @@ This Overview is simplified to show the main components and their interactions. *__Disclaimer__: The backend architecture is a work in progress and is subject to change. The following diagram shows the current architecture of the backend based on the commit that is shown in the footer of the diagram.* -![backend_architecture.svg](backend_architecture.svg) \ No newline at end of file +![backend_architecture.svg](backend_architecture.svg) diff --git a/docs/architecture/README.md b/docs/architecture/README.md index 0ef62c2613..1d9abd1444 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -6,18 +6,17 @@ The generation of the backend architecture diagram is partially automated. The d - [py2puml](https://github.com/lucsorel/py2puml) installed ## Steps -1. Autogenerate the diagram by running the following command from the root of the repository: +1. Autogenerate the diagram by running the following command from the root of the repository: ```py2puml opendevin opendevin > docs/architecture/backend_architecture.puml``` 2. Open the generated file in a PlantUML editor, e.g. Visual Studio Code with the PlantUML extension or [PlantText](https://www.planttext.com/) -3. Review the generated PUML and make all necessary adjustments to the diagram (add missing parts, fix mistakes, improve positioning). +3. Review the generated PUML and make all necessary adjustments to the diagram (add missing parts, fix mistakes, improve positioning). *py2puml creates the diagram based on the type hints in the code, so missing or incorrect type hints may result in an incomplete or incorrect diagram.* -4. Review the diff between the new and the previous diagram and manually check if the changes are correct. +4. Review the diff between the new and the previous diagram and manually check if the changes are correct. *Make sure not to remove parts that were manually added to the diagram in the past and are still relevant.* 4. Add the commit hash of the commit that was used to generate the diagram to the diagram footer. 5. Export the diagram as PNG and SVG files and replace the existing diagrams in the `docs/architecture` directory. This can be done with (e.g. [PlantText](https://www.planttext.com/)) - diff --git a/docs/architecture/backend_architecture.puml b/docs/architecture/backend_architecture.puml index 70545ce5d7..bb0b6db237 100644 --- a/docs/architecture/backend_architecture.puml +++ b/docs/architecture/backend_architecture.puml @@ -219,4 +219,4 @@ opendevin.controller.agent_controller.AgentController -> opendevin.controller.co opendevin.controller.command_manager.CommandManager -> opendevin.sandbox.sandbox.DockerInteractive footer Based on f3fda42; Generated by //py2puml// -@enduml \ No newline at end of file +@enduml diff --git a/docs/architecture/backend_architecture.svg b/docs/architecture/backend_architecture.svg index 6a088d30f3..744b49f2fd 100644 --- a/docs/architecture/backend_architecture.svg +++ b/docs/architecture/backend_architecture.svg @@ -1 +1 @@ -opendevinactionagentbasebashbrowsefileoptasksobservationagentllm.llmcontrolleragent_controllercommand_managerplanstatesandbox.sandboxserver.sessionAgentEchoActioncontent: strrunnable: boolaction: strAgentFinishActionrunnable: boolaction: strAgentRecallActionquery: straction: strAgentSummarizeActionsummary: straction: strAgentThinkActionthought: strrunnable: boolaction: strExecutableActionNotExecutableActionActionNullActionaction: strCmdKillActionid: intaction: strCmdRunActioncommand: strbackground: boolaction: strBrowseURLActionurl: straction: strFileReadActionpath: straction: strFileWriteActionpath: strcontents: straction: strAddTaskActionparent: strgoal: strsubtasks: listaction: strModifyTaskActionid: strstate: straction: strAgentMessageObservationrole: strobservation: strAgentRecallObservationmemories: List[str]role: strobservation: strObservationcontent: strBrowserOutputObservationurl: strstatus_code: interror: boolobservation: strFileReadObservationpath: strobservation: strFileWriteObservationpath: strobservation: strAgentErrorObservationobservation: strNullObservationobservation: strCmdOutputObservationcommand_id: intcommand: strexit_code: intobservation: strUserMessageObservationrole: strobservation: strAgent_registry: Dict[str, Type[Agent]]llm: LLM_complete: NoneLLMmodel: Noneapi_key: Nonebase_url: None_debug_dir: None_debug_idx: None_debug_id: None_completion: NoneAgentControlleragent: Agentmax_iterations: intworkdir: strcommand_manager: CommandManagerstate: Stateplan: Plancallbacks: List[Callable]CommandManagerdirectory: Noneshell: NonePlanmain_goal: strtask: Taskmain_goal: strtask: NoneTaskid: strgoal: strparent: Task | Nonesubtasks: List[Task]id: Noneid: Noneparent: Nonegoal: strsubtasks: NoneStateplan: Planiteration: intbackground_commands_obs: List[CmdOutputObservation]history: List[Tuple[Action, Observation]]updated_info: List[Tuple[Action, Observation]]DockerInteractivebackground_commands: Dict[int, BackgroundCommand]instance_id: Noneinstance_id: Noneworkspace_dir: Noneworkspace_dir: Noneworkspace_dir: Nonetimeout: intcontainer_image: Nonecontainer_name: NoneBackgroundCommandSessionwebsocket: Nonecontroller: Optional[AgentController]agent: Optional[Agent]agent_task: NoneBased on f3fda42; Generated bypy2puml \ No newline at end of file +opendevinactionagentbasebashbrowsefileoptasksobservationagentllm.llmcontrolleragent_controllercommand_managerplanstatesandbox.sandboxserver.sessionAgentEchoActioncontent: strrunnable: boolaction: strAgentFinishActionrunnable: boolaction: strAgentRecallActionquery: straction: strAgentSummarizeActionsummary: straction: strAgentThinkActionthought: strrunnable: boolaction: strExecutableActionNotExecutableActionActionNullActionaction: strCmdKillActionid: intaction: strCmdRunActioncommand: strbackground: boolaction: strBrowseURLActionurl: straction: strFileReadActionpath: straction: strFileWriteActionpath: strcontents: straction: strAddTaskActionparent: strgoal: strsubtasks: listaction: strModifyTaskActionid: strstate: straction: strAgentMessageObservationrole: strobservation: strAgentRecallObservationmemories: List[str]role: strobservation: strObservationcontent: strBrowserOutputObservationurl: strstatus_code: interror: boolobservation: strFileReadObservationpath: strobservation: strFileWriteObservationpath: strobservation: strAgentErrorObservationobservation: strNullObservationobservation: strCmdOutputObservationcommand_id: intcommand: strexit_code: intobservation: strUserMessageObservationrole: strobservation: strAgent_registry: Dict[str, Type[Agent]]llm: LLM_complete: NoneLLMmodel: Noneapi_key: Nonebase_url: None_debug_dir: None_debug_idx: None_debug_id: None_completion: NoneAgentControlleragent: Agentmax_iterations: intworkdir: strcommand_manager: CommandManagerstate: Stateplan: Plancallbacks: List[Callable]CommandManagerdirectory: Noneshell: NonePlanmain_goal: strtask: Taskmain_goal: strtask: NoneTaskid: strgoal: strparent: Task | Nonesubtasks: List[Task]id: Noneid: Noneparent: Nonegoal: strsubtasks: NoneStateplan: Planiteration: intbackground_commands_obs: List[CmdOutputObservation]history: List[Tuple[Action, Observation]]updated_info: List[Tuple[Action, Observation]]DockerInteractivebackground_commands: Dict[int, BackgroundCommand]instance_id: Noneinstance_id: Noneworkspace_dir: Noneworkspace_dir: Noneworkspace_dir: Nonetimeout: intcontainer_image: Nonecontainer_name: NoneBackgroundCommandSessionwebsocket: Nonecontroller: Optional[AgentController]agent: Optional[Agent]agent_task: NoneBased on f3fda42; Generated bypy2puml diff --git a/docs/architecture/system_architecture.puml b/docs/architecture/system_architecture.puml index a3711af04a..b1b9713ef4 100644 --- a/docs/architecture/system_architecture.puml +++ b/docs/architecture/system_architecture.puml @@ -2,13 +2,13 @@ node frontend as frontend{ - + component App package components{ - + component Terminal - + component ChatInterface component BannerSettings @@ -37,8 +37,8 @@ node frontend as frontend{ Terminal -[hidden]u-> ChatInterface ChatInterface -[hidden]u-> BannerSettings - - + + interface "HTTP (:3001)" as HTTP HTTP - App @@ -50,13 +50,13 @@ node backend{ component Server 'defined in server/server.py, port is defined at startup with uvicorn - interface "Client WS\n(:3000/ws)" as client_socket + interface "Client WS\n(:3000/ws)" as client_socket client_socket - Server - + } - node AgentController{ - + node AgentController{ + } Server -d-> AgentController } diff --git a/docs/architecture/system_architecture.svg b/docs/architecture/system_architecture.svg index 95375acbd9..d259a4ca3b 100644 --- a/docs/architecture/system_architecture.svg +++ b/docs/architecture/system_architecture.svg @@ -1 +1 @@ -frontendcomponentsservicesbackendserverAppsocketHTTP (:3001)TerminalChatInterfaceBannerSettingschatServicesettingsServiceServerClient WS(:3000/ws)AgentControllerconnects toVITE_TERMINAL_WS_URL \ No newline at end of file +frontendcomponentsservicesbackendserverAppsocketHTTP (:3001)TerminalChatInterfaceBannerSettingschatServicesettingsServiceServerClient WS(:3000/ws)AgentControllerconnects toVITE_TERMINAL_WS_URL diff --git a/docs/guides/GoogleLLMs.md b/docs/guides/GoogleLLMs.md index c85723117e..19521d1ecb 100644 --- a/docs/guides/GoogleLLMs.md +++ b/docs/guides/GoogleLLMs.md @@ -23,4 +23,4 @@ GOOGLE_APPLICATION_CREDENTIALS="" VERTEXAI_PROJECT="" VERTEXAI_LOCATION="" LLM_MODEL="vertex_ai/" -``` \ No newline at end of file +``` diff --git a/evaluation/README.md b/evaluation/README.md index 7393c9bf7b..7724f53e66 100644 --- a/evaluation/README.md +++ b/evaluation/README.md @@ -76,4 +76,4 @@ We have filtered out the problematic 120 instances, resulting in the creation of | Model/Agent | #instances | #init | #apply | #resolve | |------------------------|------------|-------|--------|----------| | Gold | 450 | 450 | 450 | 450 | -| Devin | 450 | 450 | 426 | 83 | \ No newline at end of file +| Devin | 450 | 450 | 426 | 83 | diff --git a/evaluation/SWE-bench/README.md b/evaluation/SWE-bench/README.md index 818abdd6f7..fb16c29fc8 100644 --- a/evaluation/SWE-bench/README.md +++ b/evaluation/SWE-bench/README.md @@ -72,7 +72,7 @@ swe-bench@2f3a6b9fcab2:/swe-bench$ ./harness/run_evaluation.sh 2024-03-20 09:24:54,970 - INFO - [matplotlib__matplotlib__3.6] [matplotlib__matplotlib-24362] Apply patch successful (test) 2024-03-20 09:24:54,974 - INFO - [matplotlib__matplotlib__3.6] [matplotlib__matplotlib-24362] Apply patch successful (pred) 2024-03-20 09:25:04,775 - INFO - [matplotlib__matplotlib__3.6] [matplotlib__matplotlib-24362] Test script run successful -swe-bench@2f3a6b9fcab2:/swe-bench$ +swe-bench@2f3a6b9fcab2:/swe-bench$ ``` ### Interpret Results diff --git a/evaluation/SWE-bench/commands.sh b/evaluation/SWE-bench/commands.sh index 8db25c2b1f..5fec234e4b 100644 --- a/evaluation/SWE-bench/commands.sh +++ b/evaluation/SWE-bench/commands.sh @@ -42,7 +42,7 @@ search_dir() { echo "More than $num_files files matched for \"$search_term\" in $dir. Please narrow your search." return fi - + echo "Found $num_matches matches for \"$search_term\" in $dir:" echo "$matches" | awk '{$2=$2; gsub(/^\.+\/+/, "./", $2); print $2 " ("$1" matches)"}' echo "End of matches for \"$search_term\" in $dir" @@ -95,7 +95,7 @@ search_file() { fi # Calculate total number of matches local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') - + # calculate total number of lines matched local num_lines=$(echo "$matches" | cut -d: -f1 | sort | uniq | wc -l | awk '{$1=$1; print $0}') # if num_lines is > 100, print an error @@ -152,4 +152,4 @@ find_file() { local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') echo "Found $num_matches matches for \"$file_name\" in $dir:" echo "$matches" | awk '{print $0}' -} \ No newline at end of file +} diff --git a/evaluation/SWE-bench/notebooks/devin_eval_analysis.ipynb b/evaluation/SWE-bench/notebooks/devin_eval_analysis.ipynb index 2f31be8218..b6633aac7c 100644 --- a/evaluation/SWE-bench/notebooks/devin_eval_analysis.ipynb +++ b/evaluation/SWE-bench/notebooks/devin_eval_analysis.ipynb @@ -36,12 +36,12 @@ "source": [ "#fetch devin's evaluation outputs into a dataframe\n", "def get_devin_eval_output():\n", - " repo_url = \"CognitionAI/devin-swebench-results\"\n", - " folder_path = \"output_diffs\"\n", + " repo_url = 'CognitionAI/devin-swebench-results'\n", + " folder_path = 'output_diffs'\n", "\n", - " base_url = \"https://api.github.com/repos/\"\n", - " pass_api_url = f\"{base_url}{repo_url}/contents/{folder_path}/pass\"\n", - " failed_api_url = f\"{base_url}{repo_url}/contents/{folder_path}/fail\"\n", + " base_url = 'https://api.github.com/repos/'\n", + " pass_api_url = f'{base_url}{repo_url}/contents/{folder_path}/pass'\n", + " failed_api_url = f'{base_url}{repo_url}/contents/{folder_path}/fail'\n", "\n", " files_info = []\n", "\n", @@ -50,13 +50,13 @@ " if response.status_code == 200:\n", " contents = response.json()\n", " for item in tqdm(contents):\n", - " if item[\"type\"] == \"file\":\n", + " if item['type'] == 'file':\n", " file_url = f\"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}\"\n", " file_content = requests.get(file_url).text\n", - " files_info.append({\"instance_id\":item['name'][:-9],\"content\": file_content, \"pass or fail\": subfolder_name})\n", + " files_info.append({'instance_id':item['name'][:-9],'content': file_content, 'pass or fail': subfolder_name})\n", "\n", - " get_files(pass_api_url, \"pass\")\n", - " get_files(failed_api_url, \"fail\")\n", + " get_files(pass_api_url, 'pass')\n", + " get_files(failed_api_url, 'fail')\n", "\n", " df = pd.DataFrame(files_info)\n", " return df" @@ -69,7 +69,7 @@ "outputs": [], "source": [ "#get the swe test dataset\n", - "swe_df = pd.DataFrame(load_dataset(\"princeton-nlp/SWE-bench\", split=\"test\"))\n", + "swe_df = pd.DataFrame(load_dataset('princeton-nlp/SWE-bench', split='test'))\n", "id2repo = {i:repo for i,repo in zip(swe_df.instance_id, swe_df.repo)}" ] }, @@ -121,7 +121,7 @@ "source": [ "# check the pass/fail status of devin outputs across different repos\n", "sns.countplot(y='repo', hue='pass or fail', data=df)\n", - "plt.title('Devin\\'s pass/fail status across repos')" + "plt.title(\"Devin's pass/fail status across repos\")" ] }, { @@ -156,7 +156,7 @@ "repo_counts = df.groupby(['repo', 'pass or fail']).size().unstack(fill_value=0)\n", "repo_counts['ratio'] = repo_counts['pass'] / (repo_counts['pass'] + repo_counts['fail'])\n", "sns.barplot(y=repo_counts.index, x='ratio', data=repo_counts.reset_index(), order = list(df.repo.drop_duplicates()))\n", - "plt.title('Devin\\'s issue-resolve rate across repos')\n", + "plt.title(\"Devin's issue-resolve rate across repos\")\n", "plt.xlabel('%Resolved')" ] }, @@ -229,7 +229,7 @@ "repo_counts = merged_df.groupby(['repo', 'subset']).size().unstack(fill_value=0)\n", "repo_counts['ratio'] = repo_counts['devin'] / (repo_counts['devin'] + repo_counts['SWE-bench_test'])\n", "sns.barplot(y=repo_counts.index, x='ratio', data=repo_counts.reset_index(), order = list(df.repo.drop_duplicates()))\n", - "plt.title('Repo-level sampling rate of Devin\\'s test set')\n", + "plt.title(\"Repo-level sampling rate of Devin's test set\")\n", "plt.xlabel('')" ] }, diff --git a/evaluation/SWE-bench/scripts/download_test_data.py b/evaluation/SWE-bench/scripts/download_test_data.py index f664f0c2ac..b893fe659b 100644 --- a/evaluation/SWE-bench/scripts/download_test_data.py +++ b/evaluation/SWE-bench/scripts/download_test_data.py @@ -1,6 +1,5 @@ from datasets import load_dataset -import pandas as pd -dataset = load_dataset("princeton-nlp/SWE-bench") -test = dataset["test"].to_pandas() -test.to_json("data/processed/swe-bench-test.json", orient="records") +dataset = load_dataset('princeton-nlp/SWE-bench') +test = dataset['test'].to_pandas() +test.to_json('data/processed/swe-bench-test.json', orient='records') diff --git a/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py b/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py index d4e6906d2e..c97e2bee51 100644 --- a/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py +++ b/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py @@ -18,12 +18,12 @@ import requests from tqdm import tqdm def get_devin_eval_output(setting): - repo_url = "CognitionAI/devin-swebench-results" - folder_path = "output_diffs" + repo_url = 'CognitionAI/devin-swebench-results' + folder_path = 'output_diffs' - base_url = "https://api.github.com/repos/" - pass_api_url = f"{base_url}{repo_url}/contents/{folder_path}/pass" - failed_api_url = f"{base_url}{repo_url}/contents/{folder_path}/fail" + base_url = 'https://api.github.com/repos/' + pass_api_url = f'{base_url}{repo_url}/contents/{folder_path}/pass' + failed_api_url = f'{base_url}{repo_url}/contents/{folder_path}/fail' pass_files_info = [] failed_files_info = [] @@ -33,47 +33,47 @@ def get_devin_eval_output(setting): if response.status_code == 200: contents = response.json() for item in tqdm(contents): - if item["type"] == "file": + if item['type'] == 'file': file_url = f"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}" file_content = requests.get(file_url).text instance_id = item['name'][:-9] - model_name = "Devin" # Update with actual model name + model_name = 'Devin' # Update with actual model name files_info.append({ - "instance_id": instance_id, - "model_patch": file_content, - "model_name_or_path": model_name, - "pass_or_fail": subfolder_name + 'instance_id': instance_id, + 'model_patch': file_content, + 'model_name_or_path': model_name, + 'pass_or_fail': subfolder_name }) - if setting == "passed" or setting == "all": - get_files(pass_api_url, "pass", pass_files_info) - if setting == "failed" or setting == "all": - get_files(failed_api_url, "fail", failed_files_info) + if setting == 'passed' or setting == 'all': + get_files(pass_api_url, 'pass', pass_files_info) + if setting == 'failed' or setting == 'all': + get_files(failed_api_url, 'fail', failed_files_info) script_dir = os.path.dirname(os.path.abspath(__file__)) - output_dir = os.path.join(script_dir, "../data/devin/") + output_dir = os.path.join(script_dir, '../data/devin/') if not os.path.exists(output_dir): os.makedirs(output_dir) - if setting == "passed" or setting == "all": - with open(os.path.join(output_dir, "devin_swe_passed.json"), "w") as pass_file: + if setting == 'passed' or setting == 'all': + with open(os.path.join(output_dir, 'devin_swe_passed.json'), 'w') as pass_file: json.dump(pass_files_info, pass_file, indent=4) - if setting == "failed" or setting == "all": - with open(os.path.join(output_dir, "devin_swe_failed.json"), "w") as fail_file: + if setting == 'failed' or setting == 'all': + with open(os.path.join(output_dir, 'devin_swe_failed.json'), 'w') as fail_file: json.dump(failed_files_info, fail_file, indent=4) - if setting == "all": + if setting == 'all': merged_output = pass_files_info + failed_files_info - with open(os.path.join(output_dir, "devin_swe_outputs.json"), "w") as merge_file: + with open(os.path.join(output_dir, 'devin_swe_outputs.json'), 'w') as merge_file: json.dump(merged_output, merge_file, indent=4) if __name__ == '__main__': if len(sys.argv) != 2: - print("Usage: python script_name.py ") + print('Usage: python script_name.py ') sys.exit(1) setting = sys.argv[1] - get_devin_eval_output(setting) \ No newline at end of file + get_devin_eval_output(setting) diff --git a/evaluation/regression/cases/hello-name/start/hello_world.sh b/evaluation/regression/cases/hello-name/start/hello_world.sh index fe98824ea9..2a43bb269b 100755 --- a/evaluation/regression/cases/hello-name/start/hello_world.sh +++ b/evaluation/regression/cases/hello-name/start/hello_world.sh @@ -1,2 +1,2 @@ #!/bin/bash -echo "hello world" \ No newline at end of file +echo "hello world" diff --git a/evaluation/regression/cases/hello-world/test_hello_world.py b/evaluation/regression/cases/hello-world/test_hello_world.py index e44c11a874..027aab185b 100644 --- a/evaluation/regression/cases/hello-world/test_hello_world.py +++ b/evaluation/regression/cases/hello-world/test_hello_world.py @@ -2,7 +2,7 @@ import os import pytest from conftest import agents -@pytest.mark.parametrize("agent", agents()) +@pytest.mark.parametrize('agent', agents()) def test_hello_world(task_file, run_test_case, agent): """ Test case for the "Hello, World!" Bash script using different agents. @@ -17,4 +17,4 @@ def test_hello_world(task_file, run_test_case, agent): # Execute the hello_world.sh script os.chdir(workspace_dir) output = os.popen('bash hello_world.sh').read() - assert output == 'Hello, World!\n' \ No newline at end of file + assert output == 'Hello, World!\n' diff --git a/evaluation/regression/cases/node-cli-rewrite/start/string_cli.py b/evaluation/regression/cases/node-cli-rewrite/start/string_cli.py index 2affff6081..ffd20d9bd1 100644 --- a/evaluation/regression/cases/node-cli-rewrite/start/string_cli.py +++ b/evaluation/regression/cases/node-cli-rewrite/start/string_cli.py @@ -1,8 +1,7 @@ import sys -import commands def print_help(): - help_text = ''' + help_text = """ Usage: python string_cli.py Commands: @@ -12,7 +11,7 @@ Commands: spongebob - Converts the input string to spongebob case. length - Returns the length of the input string. scramble - Randomly scrambles the characters in the input string. -''' +""" print(help_text) if __name__ == '__main__': diff --git a/evaluation/regression/cases/python-cli-help/start/string_cli.py b/evaluation/regression/cases/python-cli-help/start/string_cli.py index a878d51088..cd008d7565 100644 --- a/evaluation/regression/cases/python-cli-help/start/string_cli.py +++ b/evaluation/regression/cases/python-cli-help/start/string_cli.py @@ -1,5 +1,4 @@ import sys -import commands if __name__ == '__main__': if len(sys.argv) < 3: diff --git a/evaluation/regression/cases/server-test/start/server.py b/evaluation/regression/cases/server-test/start/server.py index 28c383dbe8..6c179b5cb3 100644 --- a/evaluation/regression/cases/server-test/start/server.py +++ b/evaluation/regression/cases/server-test/start/server.py @@ -13,6 +13,6 @@ def run(server_class=HTTPServer, handler_class=HelloWorldHandler, port=8000): print(f'Starting httpd on port {port}...') httpd.serve_forever() -if __name__ == "__main__": - print("starting server...") +if __name__ == '__main__': + print('starting server...') run() diff --git a/evaluation/regression/conftest.py b/evaluation/regression/conftest.py index 9a43f54016..8727b08940 100644 --- a/evaluation/regression/conftest.py +++ b/evaluation/regression/conftest.py @@ -21,7 +21,7 @@ def agents(): agents.append(agent) return agents -@pytest.fixture(scope="session") +@pytest.fixture(scope='session') def test_cases_dir(): """Fixture that provides the directory path for test cases. @@ -69,7 +69,7 @@ def model(request): Returns: The model name, defaulting to "gpt-3.5-turbo-1106". """ - return request.config.getoption("model", default="gpt-3.5-turbo-1106") + return request.config.getoption('model', default='gpt-3.5-turbo-1106') @pytest.fixture def run_test_case(test_cases_dir, workspace_dir, request): @@ -112,13 +112,13 @@ def run_test_case(test_cases_dir, workspace_dir, request): else: os.makedirs(os.path.join(agent_dir, 'workspace')) agents_ref = { - "monologue_agent":"MonologueAgent", - "codeact_agent":"CodeActAgent" + 'monologue_agent':'MonologueAgent', + 'codeact_agent':'CodeActAgent' } - process = subprocess.Popen(["python3", f"{SCRIPT_DIR}/../../opendevin/main.py", "-d", f"{os.path.join(agent_dir, 'workspace')}", "-c", f"{agents_ref[agent]}", "-t", f"{task}", "-m", "gpt-3.5-turbo-1106"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + process = subprocess.Popen(['python3', f'{SCRIPT_DIR}/../../opendevin/main.py', '-d', f"{os.path.join(agent_dir, 'workspace')}", '-c', f'{agents_ref[agent]}', '-t', f'{task}', '-m', 'gpt-3.5-turbo-1106'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) stdout, stderr = process.communicate() - logging.info(f"Stdout: {stdout}") - logging.error(f"Stderr: {stderr}") + logging.info(f'Stdout: {stdout}') + logging.error(f'Stderr: {stderr}') assert process.returncode == 0 return os.path.join(agent_dir, 'workspace') @@ -134,7 +134,7 @@ def pytest_configure(config): now = datetime.datetime.now() logging.basicConfig( level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", + format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.FileHandler(f"test_results_{now.strftime('%Y%m%d_%H%M%S')}.log"), logging.StreamHandler() diff --git a/evaluation/regression/run_tests.py b/evaluation/regression/run_tests.py index 9235705fb2..fb64aed1e5 100644 --- a/evaluation/regression/run_tests.py +++ b/evaluation/regression/run_tests.py @@ -18,6 +18,6 @@ if __name__ == '__main__': parser_args = parser.parse_args() config.config['OPENAI_API_KEY'] = parser_args.OPENAI_API_KEY - args = ['-v', 'evaluation/regression/cases',f"-o model={parser_args.model}"] + args = ['-v', 'evaluation/regression/cases',f'-o model={parser_args.model}'] - pytest.main(args) \ No newline at end of file + pytest.main(args) diff --git a/frontend/.env b/frontend/.env index ed87cc3840..6ad279902c 100644 --- a/frontend/.env +++ b/frontend/.env @@ -1,4 +1,4 @@ VITE_BACKEND_HOST="127.0.0.1:3000" VITE_USE_TLS="false" VITE_INSECURE_SKIP_VERIFY="false" -VITE_FRONTEND_PORT="3001" \ No newline at end of file +VITE_FRONTEND_PORT="3001" diff --git a/frontend/.husky/pre-commit b/frontend/.husky/pre-commit index a336d7e46e..552a158849 100755 --- a/frontend/.husky/pre-commit +++ b/frontend/.husky/pre-commit @@ -1,3 +1,3 @@ #!/bin/sh cd frontend -npx lint-staged \ No newline at end of file +npx lint-staged diff --git a/frontend/.prettierrc.json b/frontend/.prettierrc.json index 5d50a9cdd1..ffa1b71ab5 100644 --- a/frontend/.prettierrc.json +++ b/frontend/.prettierrc.json @@ -1,3 +1,3 @@ { "trailingComma": "all" -} \ No newline at end of file +} diff --git a/frontend/scripts/make-i18n-translations.cjs b/frontend/scripts/make-i18n-translations.cjs index e545ea83fa..948f2a0198 100644 --- a/frontend/scripts/make-i18n-translations.cjs +++ b/frontend/scripts/make-i18n-translations.cjs @@ -18,7 +18,7 @@ Object.entries(i18n).forEach(([key, transMap]) => { const localesPath = path.join(__dirname, "../public/locales"); if (fs.existsSync(localesPath)) { fs.rmSync(localesPath, { recursive: true }); -} +} // write translation files Object.entries(translationMap).forEach(([lang, transMap]) => { diff --git a/frontend/src/App.css b/frontend/src/App.css index 629e85a778..4c614c6618 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -2,5 +2,3 @@ @tailwind base; @tailwind components; @tailwind utilities; - - diff --git a/frontend/src/logo.svg b/frontend/src/logo.svg index 9dfc1c058c..7169476033 100644 --- a/frontend/src/logo.svg +++ b/frontend/src/logo.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/poetry.lock b/poetry.lock index 55dac00249..03a5725076 100644 --- a/poetry.lock +++ b/poetry.lock @@ -185,20 +185,6 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] -[[package]] -name = "autopep8" -version = "2.1.0" -description = "A tool that automatically formats Python code to conform to the PEP 8 style guide" -optional = false -python-versions = ">=3.8" -files = [ - {file = "autopep8-2.1.0-py2.py3-none-any.whl", hash = "sha256:2bb76888c5edbcafe6aabab3c47ba534f5a2c2d245c2eddced4a30c4b4946357"}, - {file = "autopep8-2.1.0.tar.gz", hash = "sha256:1fa8964e4618929488f4ec36795c7ff12924a68b8bf01366c094fc52f770b6e7"}, -] - -[package.dependencies] -pycodestyle = ">=2.11.0" - [[package]] name = "azure-core" version = "1.30.1" @@ -980,22 +966,6 @@ docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1 testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] typing = ["typing-extensions (>=4.8)"] -[[package]] -name = "flake8" -version = "7.0.0" -description = "the modular source code checker: pep8 pyflakes and co" -optional = false -python-versions = ">=3.8.1" -files = [ - {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, - {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, -] - -[package.dependencies] -mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.11.0,<2.12.0" -pyflakes = ">=3.2.0,<3.3.0" - [[package]] name = "flatbuffers" version = "24.3.25" @@ -2474,17 +2444,6 @@ pillow = ">=8" pyparsing = ">=2.3.1" python-dateutil = ">=2.7" -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -optional = false -python-versions = ">=3.6" -files = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -3891,17 +3850,6 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.7.0" -[[package]] -name = "pycodestyle" -version = "2.11.1" -description = "Python style guide checker" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, - {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, -] - [[package]] name = "pycparser" version = "2.22" @@ -4040,17 +3988,6 @@ typing-extensions = "*" [package.extras] dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "pytest", "pytest-asyncio", "pytest-trio", "sphinx", "toml", "tox", "trio", "trio", "trio-typing", "twine", "twisted", "validate-pyproject[all]"] -[[package]] -name = "pyflakes" -version = "3.2.0" -description = "passive checker of Python programs" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, - {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, -] - [[package]] name = "pygments" version = "2.17.2" @@ -6002,4 +5939,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "5b452934e7edb572f6f0851d63b17268645eef631a117d645bfb556e6d231ebd" +content-hash = "4679d53a31af9421e11f36ff72977a5268d5d4e37abca91f4697102ed167e6d3" diff --git a/pyproject.toml b/pyproject.toml index 396520d854..15f393c082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,6 @@ llama-index-embeddings-azure-openai = "*" llama-index-embeddings-ollama = "*" [tool.poetry.group.dev.dependencies] -autopep8 = "v2.1.0" -flake8 = "7.0.0" ruff = "0.3.7" mypy = "1.9.0" pre-commit = "3.7.0"