Removed config from agent controller (#3038)

* Removed config from agent controller * Fix tests * Increase budget * Update tests * Update prompts * Add missing prompt * Fix mistaken deletions * Fix browsing test * Fixed browse tests
2025-12-26 05:48:36 +08:00 · 2024-07-22 13:42:57 -04:00 · 2024-07-22 13:42:57 -04:00 · 4099e48122
commit 4099e48122
parent c3d4f6495f
24 changed files with 856 additions and 113 deletions
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@ -5,7 +5,7 @@ from typing import Optional, Type
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State, TrafficControlState
 from opendevin.controller.stuck import StuckDetector
-from opendevin.core.config import config
+from opendevin.core.config import LLMConfig
 from opendevin.core.exceptions import (
    LLMMalformedActionError,
    LLMNoActionError,
@ -38,8 +38,6 @@ from opendevin.events.observation import (
 )
 from opendevin.llm.llm import LLM

-MAX_ITERATIONS = config.max_iterations
-MAX_BUDGET_PER_TASK = config.max_budget_per_task
 # note: RESUME is only available on web GUI
 TRAFFIC_CONTROL_REMINDER = (
    "Please click on resume button if you'd like to continue, or start a new task."
@ -53,6 +51,7 @@ class AgentController:
    event_stream: EventStream
    state: State
    confirmation_mode: bool
+    agent_to_llm_config: dict[str, LLMConfig]
    agent_task: Optional[asyncio.Task] = None
    parent: 'AgentController | None' = None
    delegate: 'AgentController | None' = None
@ -62,10 +61,11 @@ class AgentController:
        self,
        agent: Agent,
        event_stream: EventStream,
+        max_iterations: int,
+        max_budget_per_task: float | None = None,
+        agent_to_llm_config: dict[str, LLMConfig] | None = None,
        sid: str = 'default',
-        max_iterations: int | None = MAX_ITERATIONS,
        confirmation_mode: bool = False,
-        max_budget_per_task: float | None = MAX_BUDGET_PER_TASK,
        initial_state: State | None = None,
        is_delegate: bool = False,
        headless_mode: bool = True,
@ -75,9 +75,11 @@ class AgentController:
        Args:
            agent: The agent instance to control.
            event_stream: The event stream to publish events to.
-            sid: The session ID of the agent.
            max_iterations: The maximum number of iterations the agent can run.
            max_budget_per_task: The maximum budget (in USD) allowed per task, beyond which the agent will stop.
+            agent_to_llm_config: A dictionary mapping agent names to LLM configurations in the case that
+                we delegate to a different agent.
+            sid: The session ID of the agent.
            initial_state: The initial state of the controller.
            is_delegate: Whether this controller is a delegate.
            headless_mode: Whether the agent is run in headless mode.
@ -94,16 +96,13 @@ class AgentController:
        )

        # state from the previous session, state from a parent agent, or a fresh state
-        max_iterations = (
-            max_iterations if max_iterations is not None else MAX_ITERATIONS
-        )
        self.set_initial_state(
            state=initial_state,
            max_iterations=max_iterations,
            confirmation_mode=confirmation_mode,
        )
-
        self.max_budget_per_task = max_budget_per_task
+        self.agent_to_llm_config = agent_to_llm_config if agent_to_llm_config else {}

        # stuck helper
        self._stuck_detector = StuckDetector(self.state)
@ -253,7 +252,7 @@ class AgentController:

    async def start_delegate(self, action: AgentDelegateAction):
        agent_cls: Type[Agent] = Agent.get_cls(action.agent)
-        llm_config = config.get_llm_config_from_agent(action.agent)
+        llm_config = self.agent_to_llm_config.get(action.agent, self.agent.llm.config)
        llm = LLM(config=llm_config)
        delegate_agent = agent_cls(llm=llm)
        state = State(
@ -274,6 +273,7 @@ class AgentController:
            event_stream=self.event_stream,
            max_iterations=self.state.max_iterations,
            max_budget_per_task=self.max_budget_per_task,
+            agent_to_llm_config=self.agent_to_llm_config,
            initial_state=state,
            is_delegate=True,
        )
@ -423,7 +423,7 @@ class AgentController:
    def set_initial_state(
        self,
        state: State | None,
-        max_iterations: int = MAX_ITERATIONS,
+        max_iterations: int,
        confirmation_mode: bool = False,
    ):
        # state from the previous session, state from a parent agent, or a new state
--- a/opendevin/core/config.py
+++ b/opendevin/core/config.py
@ -275,6 +275,10 @@ class AppConfig(metaclass=Singleton):
    def set_agent_config(self, value: AgentConfig, name='agent'):
        self.agents[name] = value

+    def get_agent_to_llm_config_map(self) -> dict[str, LLMConfig]:
+        """Get a map of agent names to llm configs."""
+        return {name: self.get_llm_config_from_agent(name) for name in self.agents}
+
    def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
        agent_config: AgentConfig = self.get_agent_config(name)
        llm_config_name = agent_config.llm_config
--- a/opendevin/core/main.py
+++ b/opendevin/core/main.py
@ -33,8 +33,8 @@ def read_task_from_stdin() -> str:
 async def run_agent_controller(
    agent: Agent,
    task_str: str,
-    max_iterations: int | None = None,
-    max_budget_per_task: float | None = None,
+    max_iterations: int,
+    max_budget_per_task: float,
    exit_on_message: bool = False,
    fake_user_response_fn: Callable[[State | None], str] | None = None,
    sandbox: Sandbox | None = None,
@ -75,6 +75,7 @@ async def run_agent_controller(
        agent=agent,
        max_iterations=max_iterations,
        max_budget_per_task=max_budget_per_task,
+        agent_to_llm_config=config.get_agent_to_llm_config_map(),
        event_stream=event_stream,
        initial_state=initial_state,
        headless_mode=headless_mode,
--- a/opendevin/server/session/agent.py
+++ b/opendevin/server/session/agent.py
@ -4,7 +4,7 @@ from agenthub.codeact_agent.codeact_agent import CodeActAgent
 from opendevin.controller import AgentController
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
-from opendevin.core.config import SandboxConfig
+from opendevin.core.config import LLMConfig, SandboxConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.stream import EventStream
 from opendevin.runtime import DockerSSHBox, get_runtime_cls
@ -37,6 +37,8 @@ class AgentSession:
        agent: Agent,
        confirmation_mode: bool,
        max_iterations: int,
+        max_budget_per_task: float | None = None,
+        agent_to_llm_config: dict[str, LLMConfig] | None = None,
    ):
        """Starts the agent session.

@ -48,7 +50,13 @@ class AgentSession:
                'Session already started. You need to close this session and start a new one.'
            )
        await self._create_runtime(runtime_name, sandbox_config)
-        await self._create_controller(agent, confirmation_mode, max_iterations)
+        await self._create_controller(
+            agent,
+            confirmation_mode,
+            max_iterations,
+            max_budget_per_task=max_budget_per_task,
+            agent_to_llm_config=agent_to_llm_config,
+        )

    async def close(self):
        if self._closed:
@ -74,7 +82,12 @@ class AgentSession:
        await self.runtime.ainit()

    async def _create_controller(
-        self, agent: Agent, confirmation_mode: bool, max_iterations: int
+        self,
+        agent: Agent,
+        confirmation_mode: bool,
+        max_iterations: int,
+        max_budget_per_task: float | None = None,
+        agent_to_llm_config: dict[str, LLMConfig] | None = None,
    ):
        """Creates an AgentController instance."""
        if self.controller is not None:
@ -100,6 +113,8 @@ class AgentSession:
            event_stream=self.event_stream,
            agent=agent,
            max_iterations=int(max_iterations),
+            max_budget_per_task=max_budget_per_task,
+            agent_to_llm_config=agent_to_llm_config,
            confirmation_mode=confirmation_mode,
            # AgentSession is designed to communicate with the frontend, so we don't want to
            # run the agent in headless mode.
@ -107,7 +122,9 @@ class AgentSession:
        )
        try:
            agent_state = State.restore_from_session(self.sid)
-            self.controller.set_initial_state(agent_state)
+            self.controller.set_initial_state(
+                agent_state, max_iterations, confirmation_mode
+            )
            logger.info(f'Restored agent state from session, sid: {self.sid}')
        except Exception as e:
            print('Error restoring state', e)
--- a/opendevin/server/session/session.py
+++ b/opendevin/server/session/session.py
@ -106,6 +106,8 @@ class Session:
                agent=agent,
                confirmation_mode=confirmation_mode,
                max_iterations=max_iterations,
+                max_budget_per_task=self.config.max_budget_per_task,
+                agent_to_llm_config=self.config.get_agent_to_llm_config_map(),
            )
        except Exception as e:
            logger.exception(f'Error creating controller: {e}')
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_002.log
@ -8,7 +8,7 @@ possible next action to accomplish your goal. Your answer will be interpreted
 and executed by a program, make sure to follow the formatting instructions.

 # Goal:
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+Sure! Let me browse the server's homepage at http://localhost:8000 to find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"

 # Action Space

--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_003.log
@ -8,7 +8,7 @@ possible next action to accomplish your goal. Your answer will be interpreted
 and executed by a program, make sure to follow the formatting instructions.

 # Goal:
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+Sure! Let me browse the server's homepage at http://localhost:8000 to find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"

 # Action Space

--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_004.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_004.log
@ -8,7 +8,7 @@ possible next action to accomplish your goal. Your answer will be interpreted
 and executed by a program, make sure to follow the formatting instructions.

 # Goal:
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+Sure! Let me browse the server's homepage at http://localhost:8000 to find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"

 # Action Space

--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log
@ -405,12 +405,12 @@ Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me fo


 <execute_browse>
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+Sure! Let me browse the server's homepage at http://localhost:8000 to find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"
 </execute_browse>

 ----------

 OBSERVATION:
-{'content': 'The ultimate answer to life, the universe, and everything is: OpenDevin is all you need!'}
+{'content': 'The answer to life, the universe, and everything is: OpenDevin is all you need!'}

 ENVIRONMENT REMINDER: You have 8 turns left to complete the task. When finished reply with <finish></finish>
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_001.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_001.log
@ -1,4 +1,4 @@
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.
+Sure! Let me browse the server's homepage at http://localhost:8000 to find the ultimate answer to life.

 <execute_browse>
 Get the content on "http://localhost:8000"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_002.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_002.log
@ -1,5 +1,2 @@
-To accomplish our goal of retrieving the ultimate answer to life from localhost:8000, I need to start by accessing the content on that page. Here's my thought process and action:
-
-In order to accomplish my goal, I need to navigate to the specified URL using the goto action.
-
+In order to accomplish my goal, I need to navigate to the server's homepage at http://localhost:8000 to find the ultimate answer to life.
 ```goto("http://localhost:8000"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_003.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_003.log
@ -1,6 +1,2 @@
-To accomplish our goal of retrieving the ultimate answer to life, we need to click on the "Click me" button. The button has a bid of 10, so we should click on it.
-
-Here's the action I'll take:
-
-In order to accomplish my goal, I need to click on the button with bid 10
+In order to accomplish my goal, I need to click on the button with bid 10 to reveal the answer to life, the universe, and everything.
 ```click("10"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_004.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_004.log
@ -1,6 +1,3 @@
-Based on the current Accessibility Tree and the previous actions taken, it appears that we have already successfully navigated to the webpage and clicked the button to reveal the answer. The answer is now visible on the page.
+In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything. The text is already visible and states "The answer is OpenDevin is all you need!"

-To accomplish our goal of retrieving the ultimate answer to life, I will now send this information to the user:
-
-```
-send_msg_to_user("The ultimate answer to life, the universe, and everything is: OpenDevin is all you need!"
+```send_msg_to_user("The answer to life, the universe, and everything is: OpenDevin is all you need!"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_005.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_005.log
@ -1,9 +1,6 @@
-Thank you for providing the content from localhost:8000. Based on the information retrieved, I can now answer your question about the ultimate answer to life.
+The content of the server's homepage at http://localhost:8000 is:

-The ultimate answer to life, according to the content on localhost:8000, is:
-
-"OpenDevin is all you need!"
-
-This appears to be a playful reference to the popular science fiction series "The Hitchhiker's Guide to the Galaxy" by Douglas Adams, where the answer to the ultimate question of life, the universe, and everything is famously "42". In this case, it's been adapted to promote OpenDevin as the ultimate solution.
+"The answer to life, the universe, and everything is: OpenDevin is all you need!"

+If you have any further questions or tasks, feel free to ask!
 <finish></finish>
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_003.log
@ -613,26 +613,29 @@ OBSERVATION:

 ----------

-I will now correct the typos in the `bad.txt` file.
+Let's correct the typos in the `bad.txt` file.
+
+1. "stupid typoo" should be "stupid typo".
+2. "No mor typos!" should be "No more typos!".
+
+I will make these changes now.
 <execute_ipython>
 edit_file_by_replace(
    "bad.txt",
-    to_replace="This is a stupid typoo.",
-    new_content="This is a stupid typo."
+    to_replace="This is a stupid typoo.\nNo mor typos!",
+    new_content="This is a stupid typo.\nNo more typos!"
 )
 </execute_ipython>

 ----------

 OBSERVATION:
-[File: /workspace/bad.txt (4 lines total after edit)]
-(this is the beginning of the file)
-1|This is a stupid typo.
-2|Really?
-3|No mor typos!
-4|Enjoy!
-(this is the end of the file)
-[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+[No exact match found in bad.txt for
+```
+This is a stupid typoo.
+No mor typos!
+```
+]


 ENVIRONMENT REMINDER: You have 12 turns left to complete the task.
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_004.log
@ -613,7 +613,39 @@ OBSERVATION:

 ----------

-I will now correct the typos in the `bad.txt` file.
+Let's correct the typos in the `bad.txt` file.
+
+1. "stupid typoo" should be "stupid typo".
+2. "No mor typos!" should be "No more typos!".
+
+I will make these changes now.
+<execute_ipython>
+edit_file_by_replace(
+    "bad.txt",
+    to_replace="This is a stupid typoo.\nNo mor typos!",
+    new_content="This is a stupid typo.\nNo more typos!"
+)
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+[No exact match found in bad.txt for
+```
+This is a stupid typoo.
+No mor typos!
+```
+]
+
+
+----------
+
+It seems there was an issue with the exact match. I will correct the typos line by line instead.
+
+1. "This is a stupid typoo." should be "This is a stupid typo."
+2. "No mor typos!" should be "No more typos!".
+
+Let's proceed with these changes.
 <execute_ipython>
 edit_file_by_replace(
    "bad.txt",
@ -635,28 +667,4 @@ OBSERVATION:
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]


----------
-
-I will now correct the remaining typo on line 3.
-<execute_ipython>
-edit_file_by_replace(
-    "bad.txt",
-    to_replace="No mor typos!",
-    new_content="No more typos!"
-)
-</execute_ipython>
-
----------
-
-OBSERVATION:
-[File: /workspace/bad.txt (4 lines total after edit)]
-(this is the beginning of the file)
-1|This is a stupid typo.
-2|Really?
-3|No more typos!
-4|Enjoy!
-(this is the end of the file)
-[File updated (edited at line 3). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
-
-
 ENVIRONMENT REMINDER: You have 11 turns left to complete the task.
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_005.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_005.log
@ -0,0 +1,694 @@
+
+
+----------
+
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+
+
+Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
+open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
+    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down`
+    to view the file if you want to see more.
+    Args:
+    path: str: The path to the file to open, preferred absolute path.
+    line_number: int | None = 1: The line number to move to. Defaults to 1.
+    context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100.
+
+goto_line(line_number: int) -> None:
+    Moves the window to show the specified line number.
+    Args:
+    line_number: int: The line number to move to.
+
+scroll_down() -> None:
+    Moves the window down by 100 lines.
+    Args:
+    None
+
+scroll_up() -> None:
+    Moves the window up by 100 lines.
+    Args:
+    None
+
+create_file(filename: str) -> None:
+    Creates and opens a new file with the given name.
+    Args:
+    filename: str: The name of the file to create.
+
+edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
+    Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`.
+    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
+    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
+    `edit_file_by_replace` will only replace the *first* matching occurrences.
+    For example, given a file "/workspace/example.txt" with the following content:
+    ```
+    line 1
+    line 2
+    line 2
+    line 3
+    ```
+    EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique:
+    edit_file_by_replace(
+    '/workspace/example.txt',
+    to_replace='line 2
+    line 3',
+    new_content='new line
+    line 3',
+    )
+    This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged.
+    The resulting file will be:
+    ```
+    line 1
+    line 2
+    new line
+    line 3
+    ```
+    REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string:
+    edit_file_by_replace(
+    '/workspace/example.txt',
+    to_replace='line 2
+    line 3',
+    new_content='',
+    )
+    Args:
+    file_name: str: The name of the file to edit.
+    to_replace: str: The content to search for and replace.
+    new_content: str: The new content to replace the old content with.
+
+insert_content_at_line(file_name: str, line_number: int, content: str) -> None:
+    Insert content at the given line number in a file.
+    This will NOT modify the content of the lines before OR after the given line number.
+    For example, if the file has the following content:
+    ```
+    line 1
+    line 2
+    line 3
+    ```
+    and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to:
+    ```
+    line 1
+    new line
+    line 2
+    line 3
+    ```
+    Args:
+    file_name: str: The name of the file to edit.
+    line_number: int: The line number (starting from 1) to insert the content after.
+    content: str: The content to insert.
+
+append_file(file_name: str, content: str) -> None:
+    Append content to the given file.
+    It appends text `content` to the end of the specified file.
+    Args:
+    file_name: str: The name of the file to edit.
+    line_number: int: The line number (starting from 1) to insert the content after.
+    content: str: The content to insert.
+
+search_dir(search_term: str, dir_path: str = './') -> None:
+    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
+    Args:
+    search_term: str: The term to search for.
+    dir_path: Optional[str]: The path to the directory to search.
+
+search_file(search_term: str, file_path: Optional[str] = None) -> None:
+    Searches for search_term in file. If file is not provided, searches in the current open file.
+    Args:
+    search_term: str: The term to search for.
+    file_path: Optional[str]: The path to the file to search.
+
+find_file(file_name: str, dir_path: str = './') -> None:
+    Finds all files with the given name in the specified directory.
+    Args:
+    file_name: str: The name of the file to find.
+    dir_path: Optional[str]: The path to the directory to search.
+
+parse_pdf(file_path: str) -> None:
+    Parses the content of a PDF file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_docx(file_path: str) -> None:
+    Parses the content of a DOCX file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_latex(file_path: str) -> None:
+    Parses the content of a LaTex file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_pptx(file_path: str) -> None:
+    Parses the content of a pptx file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+
+The assistant's response should be concise.
+The assistant should include ONLY ONE <execute_ipython> or <execute_bash> in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+
+
+----------
+
+Here is an example of how you can interact with the environment for task solving:
+
+--- START OF EXAMPLE ---
+
+USER: We are currently solving the following issue within our repository. Here is the issue text:
+--- BEGIN ISSUE ---
+TimeDelta serialization precision
+Hi there!
+
+I just found quite strange behaviour of `TimeDelta` field serialization
+
+```python3
+from marshmallow.fields import TimeDelta
+from datetime import timedelta
+
+td_field = TimeDelta(precision="milliseconds")
+
+obj = dict()
+obj["td_field"] = timedelta(milliseconds=345)
+
+print(td_field.serialize("td_field", obj))
+```
+
+Output of this snippet is `344`, but it seems that `345` is correct.
+
+Looks like a rounding issue here: https://github.com/marshmallow-code/marshmallow/blob/dev/src/marshmallow/fields.py#L1474
+--- END ISSUE ---
+
+[Current directory: /workspace/marshmallow-code__marshmallow]
+
+ASSISTANT:
+Let's first start by reproducing the results of the issue. The issue includes some example code for reproduction, which we can use. We'll create a new file called `reproduce.py` and paste the example code into it.
+<execute_ipython>
+create_file('/workspace/marshmallow-code__marshmallow/reproduce.py')
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/reproduce.py (1 lines total)]
+1|
+[File /workspace/marshmallow-code__marshmallow/reproduce.py created.]
+
+ASSISTANT:
+Now let's paste in the example code from the issue.
+<execute_ipython>
+EDITED_CODE = """from marshmallow.fields import TimeDelta
+from datetime import timedelta
+
+td_field = TimeDelta(precision="milliseconds")
+
+obj = dict()
+obj["td_field"] = timedelta(milliseconds=345)
+
+print(td_field.serialize("td_field", obj))
+"""
+edit_file(start=1, end=1, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/reproduce.py (9 lines total)]
+1|from marshmallow.fields import TimeDelta
+2|from datetime import timedelta
+3|
+4|td_field = TimeDelta(precision="milliseconds")
+5|
+6|obj = dict()
+7|obj["td_field"] = timedelta(milliseconds=345)
+8|
+9|print(td_field.serialize("td_field", obj))
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+Now let's run the code to see if we see the same output as the issue.
+<execute_bash>
+python reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+344
+
+ASSISTANT:
+We are indeed seeing the same output as the issue. The issue suggests that we should look at line 1474 of the `fields.py` file to see if there is a rounding issue. Let's open the `fields.py` file and navigate to line 1474 to see the relevant code. First we should find the `fields.py` file in the repository. It is likely to be in the `src/marshmallow` directory, but we should check to be sure.
+<execute_bash>
+ls -F
+</execute_bash>
+
+USER:
+OBSERVATION:
+AUTHORS.rst
+CHANGELOG.rst
+CODE_OF_CONDUCT.md
+CONTRIBUTING.rst
+LICENSE
+MANIFEST.in
+NOTICE
+README.rst
+RELEASING.md
+azure-pipelines.yml
+docs/
+examples/
+performance/
+pyproject.toml
+reproduce.py
+setup.cfg
+setup.py
+src/
+tests/
+tox.ini
+
+ASSISTANT:
+It looks like the `src` directory is present, which suggests that the `fields.py` file is likely to be in the `src` directory. Let's use find_file to see where it is.
+<execute_ipython>
+find_file("fields.py", "/workspace/marshmallow-code__marshmallow/src")
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[Found 1 matches for "fields.py" in /workspace/marshmallow-code__marshmallow/src]
+/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py
+
+ASSISTANT:
+It looks like the `fields.py` file is present in the `./src/marshmallow/` directory. The issue also points to a specific URL with line number 1474. We should navigate to that line in fields.py to see the relevant code for the `TimeDelta` serialization.
+<execute_ipython>
+open_file("/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py", 1474)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1458 more lines above)
+1459|            self.WEEKS,
+1460|        )
+1461|
+1462|        if precision not in units:
+1463|            msg = 'The precision must be {} or "{}".'.format(
+1464|                ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
+1465|            )
+1466|            raise ValueError(msg)
+1467|
+1468|        self.precision = precision
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(value.total_seconds() / base_unit.total_seconds())
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+1481|            raise self.make_error("invalid") from error
+1482|
+1483|        kwargs = {self.precision: value}
+1484|
+1485|        try:
+1486|            return dt.timedelta(**kwargs)
+1487|        except OverflowError as error:
+1488|            raise self.make_error("invalid") from error
+1489|
+1490|
+1491|class Mapping(Field):
+1492|    """An abstract class for objects with key-value pairs.
+1493|
+1494|    :param keys: A field class or instance for dict keys.
+1495|    :param values: A field class or instance for dict values.
+1496|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1497|
+1498|    .. note::
+1499|        When the structure of nested data is not known, you may omit the
+1500|        `keys` and `values` arguments to prevent content validation.
+1501|
+1502|    .. versionadded:: 3.0.0rc4
+1503|    """
+1504|
+1505|    mapping_type = dict
+1506|
+1507|    #: Default error messages.
+1508|    default_error_messages = {"invalid": "Not a valid mapping type."}
+1509|
+1510|    def __init__(
+1511|        self,
+1512|        keys: typing.Optional[typing.Union[Field, type]] = None,
+1513|        values: typing.Optional[typing.Union[Field, type]] = None,
+1514|        **kwargs,
+1515|    ):
+1516|        super().__init__(**kwargs)
+1517|        if keys is None:
+1518|            self.key_field = None
+1519|        else:
+1520|            try:
+1521|                self.key_field = resolve_field_instance(keys)
+1522|            except FieldInstanceResolutionError as error:
+1523|                raise ValueError(
+1524|                    '"keys" must be a subclass or instance of '
+1525|                    "marshmallow.base.FieldABC."
+1526|                ) from error
+1527|
+1528|        if values is None:
+1529|            self.value_field = None
+1530|        else:
+1531|            try:
+1532|                self.value_field = resolve_field_instance(values)
+1533|            except FieldInstanceResolutionError as error:
+1534|                raise ValueError(
+1535|                    '"values" must be a subclass or instance of '
+1536|                    "marshmallow.base.FieldABC."
+1537|                ) from error
+1538|            if isinstance(self.value_field, Nested):
+1539|                self.only = self.value_field.only
+1540|                self.exclude = self.value_field.exclude
+1541|
+1542|    def _bind_to_schema(self, field_name, schema):
+1543|        super()._bind_to_schema(field_name, schema)
+1544|        if self.value_field:
+1545|            self.value_field = copy.deepcopy(self.value_field)
+1546|            self.value_field._bind_to_schema(field_name, self)
+1547|        if isinstance(self.value_field, Nested):
+1548|            self.value_field.only = self.only
+1549|            self.value_field.exclude = self.exclude
+1550|        if self.key_field:
+1551|            self.key_field = copy.deepcopy(self.key_field)
+1552|            self.key_field._bind_to_schema(field_name, self)
+1553|
+1554|    def _serialize(self, value, attr, obj, **kwargs):
+1555|        if value is None:
+1556|            return None
+1557|        if not self.value_field and not self.key_field:
+1558|            return self.mapping_type(value)
+(439 more lines below)
+
+ASSISTANT:
+We are now looking at the relevant section of the `fields.py` file where the `TimeDelta` serialization occurs. The issue suggests that there is a rounding problem near line 1474. The current implementation uses integer division, which truncates the decimal part and can lead to incorrect rounding on line 1475. To fix this issue, we should ensure that the division results in a float, and then round the result to the nearest integer before converting it to an int. This should preserve the milliseconds precision as expected. Let's make the necessary edit to the code.
+<execute_ipython>
+edit_file(start=1475, end=1475, content="return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int")
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
+
+ERRORS:
+- E999 IndentationError: unexpected indent
+
+[This is how your edit would have looked if applied]
+-------------------------------------------------
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1468 more lines above)
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+(518 more lines below)
+-------------------------------------------------
+
+[This is the original code before your edit]
+-------------------------------------------------
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1469 more lines above)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(value.total_seconds() / base_unit.total_seconds())
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+(517 more lines below)
+-------------------------------------------------
+Your changes have NOT been applied. Please fix your edit command and try again.
+You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
+DO NOT re-run the same failed edit command. Running it again will lead to the same error.
+
+ASSISTANT:
+My edit command did not use the proper indentation, I will fix my syntax in this follow up edit command.
+<execute_ipython>
+EDITED_CODE="        return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int"
+edit_file(start=1475, end=1475, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1424 more lines above)
+1425|
+1426|    :param precision: Influences how the integer is interpreted during
+1427|        (de)serialization. Must be 'days', 'seconds', 'microseconds',
+1428|        'milliseconds', 'minutes', 'hours' or 'weeks'.
+1429|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1430|
+1431|    .. versionchanged:: 2.0.0
+1432|        Always serializes to an integer value to avoid rounding errors.
+1433|        Add `precision` parameter.
+1434|    """
+1435|
+1436|    DAYS = "days"
+1437|    SECONDS = "seconds"
+1438|    MICROSECONDS = "microseconds"
+1439|    MILLISECONDS = "milliseconds"
+1440|    MINUTES = "minutes"
+1441|    HOURS = "hours"
+1442|    WEEKS = "weeks"
+1443|
+1444|    #: Default error messages.
+1445|    default_error_messages = {
+1446|        "invalid": "Not a valid period of time.",
+1447|        "format": "{input!r} cannot be formatted as a timedelta.",
+1448|    }
+1449|
+1450|    def __init__(self, precision: str = SECONDS, **kwargs):
+1451|        precision = precision.lower()
+1452|        units = (
+1453|            self.DAYS,
+1454|            self.SECONDS,
+1455|            self.MICROSECONDS,
+1456|            self.MILLISECONDS,
+1457|            self.MINUTES,
+1458|            self.HOURS,
+1459|            self.WEEKS,
+1460|        )
+1461|
+1462|        if precision not in units:
+1463|            msg = 'The precision must be {} or "{}".'.format(
+1464|                ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
+1465|            )
+1466|            raise ValueError(msg)
+1467|
+1468|        self.precision = precision
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+1481|            raise self.make_error("invalid") from error
+1482|
+1483|        kwargs = {self.precision: value}
+1484|
+1485|        try:
+1486|            return dt.timedelta(**kwargs)
+1487|        except OverflowError as error:
+1488|            raise self.make_error("invalid") from error
+1489|
+1490|
+1491|class Mapping(Field):
+1492|    """An abstract class for objects with key-value pairs.
+1493|
+1494|    :param keys: A field class or instance for dict keys.
+1495|    :param values: A field class or instance for dict values.
+1496|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1497|
+1498|    .. note::
+1499|        When the structure of nested data is not known, you may omit the
+1500|        `keys` and `values` arguments to prevent content validation.
+1501|
+1502|    .. versionadded:: 3.0.0rc4
+1503|    """
+1504|
+1505|    mapping_type = dict
+1506|
+1507|    #: Default error messages.
+1508|    default_error_messages = {"invalid": "Not a valid mapping type."}
+1509|
+1510|    def __init__(
+1511|        self,
+1512|        keys: typing.Optional[typing.Union[Field, type]] = None,
+1513|        values: typing.Optional[typing.Union[Field, type]] = None,
+1514|        **kwargs,
+1515|    ):
+1516|        super().__init__(**kwargs)
+1517|        if keys is None:
+1518|            self.key_field = None
+1519|        else:
+1520|            try:
+1521|                self.key_field = resolve_field_instance(keys)
+1522|            except FieldInstanceResolutionError as error:
+1523|                raise ValueError(
+1524|                    '"keys" must be a subclass or instance of '
+(473 more lines below)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+The code has been updated to use the `round` function, which should fix the rounding issue. Before submitting the changes, it would be prudent to run the reproduce.py code again to ensure that this change has the desired effect. I see that "Current directory" is still `/marshmallow-code__marshmallow`, so we can just run `reproduce.py` directly.
+<execute_bash>
+python reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+345
+
+ASSISTANT:
+The output has changed from 344 to 345, which suggests that the rounding issue has been fixed. Let's remove the reproduce.py file since it is no longer needed.
+<execute_bash>
+rm reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+
+ASSISTANT:
+rm doesn't have any output when it deletes successfully, so that must have worked. Now that we have fixed the issue, we can exit the current task.
+<execute_bash>
+exit
+</execute_bash>
+
+--- END OF EXAMPLE ---
+
+
+NOW, LET'S START!
+
+----------
+
+Fix typos in bad.txt. Do not ask me for confirmation at any point.
+
+----------
+
+Let's start by opening the `bad.txt` file to identify and fix the typos.
+<execute_ipython>
+open_file("bad.txt")
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+[File: /workspace/bad.txt (4 lines total)]
+(this is the beginning of the file)
+1|This is a stupid typoo.
+2|Really?
+3|No mor typos!
+4|Enjoy!
+(this is the end of the file)
+
+
+----------
+
+Let's correct the typos in the `bad.txt` file.
+
+1. "stupid typoo" should be "stupid typo".
+2. "No mor typos!" should be "No more typos!".
+
+I will make these changes now.
+<execute_ipython>
+edit_file_by_replace(
+    "bad.txt",
+    to_replace="This is a stupid typoo.\nNo mor typos!",
+    new_content="This is a stupid typo.\nNo more typos!"
+)
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+[No exact match found in bad.txt for
+```
+This is a stupid typoo.
+No mor typos!
+```
+]
+
+
+----------
+
+It seems there was an issue with the exact match. I will correct the typos line by line instead.
+
+1. "This is a stupid typoo." should be "This is a stupid typo."
+2. "No mor typos!" should be "No more typos!".
+
+Let's proceed with these changes.
+<execute_ipython>
+edit_file_by_replace(
+    "bad.txt",
+    to_replace="This is a stupid typoo.",
+    new_content="This is a stupid typo."
+)
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+[File: /workspace/bad.txt (4 lines total after edit)]
+(this is the beginning of the file)
+1|This is a stupid typo.
+2|Really?
+3|No mor typos!
+4|Enjoy!
+(this is the end of the file)
+[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+
+----------
+
+Now, let's correct the second typo: "No mor typos!" should be "No more typos!".
+<execute_ipython>
+edit_file_by_replace(
+    "bad.txt",
+    to_replace="No mor typos!",
+    new_content="No more typos!"
+)
+</execute_ipython>
+
+----------
+
+OBSERVATION:
+[File: /workspace/bad.txt (4 lines total after edit)]
+(this is the beginning of the file)
+1|This is a stupid typo.
+2|Really?
+3|No more typos!
+4|Enjoy!
+(this is the end of the file)
+[File updated (edited at line 3). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+
+ENVIRONMENT REMINDER: You have 10 turns left to complete the task.
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/response_002.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/response_002.log
@ -1,8 +1,12 @@
-I will now correct the typos in the `bad.txt` file.
+Let's correct the typos in the `bad.txt` file.

+1. "stupid typoo" should be "stupid typo".
+2. "No mor typos!" should be "No more typos!".
+
+I will make these changes now.
 <execute_ipython>
 edit_file_by_replace(
    "bad.txt",
-    to_replace="This is a stupid typoo.",
-    new_content="This is a stupid typo."
+    to_replace="This is a stupid typoo.\nNo mor typos!",
+    new_content="This is a stupid typo.\nNo more typos!"
 )
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/response_003.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/response_003.log
@ -1,7 +1,12 @@
-I will now correct the remaining typo on line 3.
+It seems there was an issue with the exact match. I will correct the typos line by line instead.
+
+1. "This is a stupid typoo." should be "This is a stupid typo."
+2. "No mor typos!" should be "No more typos!".
+
+Let's proceed with these changes.
 <execute_ipython>
 edit_file_by_replace(
    "bad.txt",
-    to_replace="No mor typos!",
-    new_content="No more typos!"
+    to_replace="This is a stupid typoo.",
+    new_content="This is a stupid typo."
 )
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/response_004.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/response_004.log
@ -1,10 +1,8 @@
-The typos in `bad.txt` have been corrected. The file now reads:
+Now, let's correct the second typo: "No mor typos!" should be "No more typos!".

-```
-This is a stupid typo.
-Really?
-No more typos!
-Enjoy!
-```
-
-If there are no further changes needed, the task is complete.
+<execute_ipython>
+edit_file_by_replace(
+    "bad.txt",
+    to_replace="No mor typos!",
+    new_content="No more typos!"
+)
--- a/tests/integration/mock/CodeActSWEAgent/test_edits/response_005.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_edits/response_005.log
@ -0,0 +1,10 @@
+The typos in `bad.txt` have been corrected successfully. Here is the updated content:
+
+```
+This is a stupid typo.
+Really?
+No more typos!
+Enjoy!
+```
+
+If there are no further changes needed, we can consider this task complete.
--- a/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_002.log
+++ b/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_002.log
@ -1,8 +1,6 @@
-```json
 {
  "action": "run",
  "args": {
    "command": "git status"
  }
 }
-```
--- a/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_006.log
+++ b/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_006.log
@ -1,10 +1,8 @@
-```json
 {
  "action": "reject",
  "args": {
    "outputs": {
-      "reason": "CommitWriterAgent failed because the current directory is not a valid git repository. No other agents can resolve this issue."
+      "reason": "CommitWriterAgent failed because it is not a valid git repository. No other agents can resolve this issue."
    }
  }
 }
-```
--- a/tests/integration/test_agent.py
+++ b/tests/integration/test_agent.py
@ -21,6 +21,8 @@ from opendevin.llm.llm import LLM
 workspace_base = os.getenv('WORKSPACE_BASE')
 workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH')
 workspace_mount_path_in_sandbox = os.getenv('WORKSPACE_MOUNT_PATH_IN_SANDBOX')
+max_iterations = 15
+max_budget_per_task = 15

 print('\nPaths used:')
 print(f'workspace_base: {workspace_base}')
@ -30,7 +32,7 @@ print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}')

 def get_number_of_prompts(test_name: str):
    mock_dir = os.path.join(
-        os.environ.get('SCRIPT_DIR'), 'mock', os.environ.get('DEFAULT_AGENT'), test_name
+        os.environ['SCRIPT_DIR'], 'mock', os.environ['DEFAULT_AGENT'], test_name
    )
    prompt_files = [file for file in os.listdir(mock_dir) if file.startswith('prompt_')]
    return len(prompt_files)
@ -70,7 +72,7 @@ def validate_final_state(final_state: State | None, test_name: str):
    os.getenv('DEFAULT_AGENT') == 'ManagerAgent',
    reason='Manager agent is not capable of finishing this in reasonable steps yet',
 )
-def test_write_simple_script(current_test_name) -> None:
+def test_write_simple_script(current_test_name: str) -> None:
    task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
    args = parse_arguments()

@ -78,7 +80,9 @@ def test_write_simple_script(current_test_name) -> None:
    agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig()))

    final_state: State | None = asyncio.run(
-        run_agent_controller(agent, task, exit_on_message=True)
+        run_agent_controller(
+            agent, task, max_iterations, max_budget_per_task, exit_on_message=True
+        )
    )
    validate_final_state(final_state, current_test_name)

@ -116,7 +120,7 @@ def test_write_simple_script(current_test_name) -> None:
    os.getenv('SANDBOX_BOX_TYPE') == 'local',
    reason='local sandbox shows environment-dependent absolute path for pwd command',
 )
-def test_edits(current_test_name):
+def test_edits(current_test_name: str):
    args = parse_arguments()
    # Copy workspace artifacts to workspace_base location
    source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
@ -133,7 +137,9 @@ def test_edits(current_test_name):
    # Execute the task
    task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
    final_state: State | None = asyncio.run(
-        run_agent_controller(agent, task, exit_on_message=True)
+        run_agent_controller(
+            agent, task, max_iterations, max_budget_per_task, exit_on_message=True
+        )
    )
    validate_final_state(final_state, current_test_name)

@ -157,7 +163,7 @@ Enjoy!
    os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
    reason='Currently, only ssh sandbox supports stateful tasks',
 )
-def test_ipython(current_test_name):
+def test_ipython(current_test_name: str):
    args = parse_arguments()

    # Create the agent
@ -166,7 +172,9 @@ def test_ipython(current_test_name):
    # Execute the task
    task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
    final_state: State | None = asyncio.run(
-        run_agent_controller(agent, task, exit_on_message=True)
+        run_agent_controller(
+            agent, task, max_iterations, max_budget_per_task, exit_on_message=True
+        )
    )
    validate_final_state(final_state, current_test_name)

@ -190,7 +198,7 @@ def test_ipython(current_test_name):
    os.getenv('SANDBOX_BOX_TYPE') == 'local',
    reason='FIXME: local sandbox does not capture stderr',
 )
-def test_simple_task_rejection(current_test_name):
+def test_simple_task_rejection(current_test_name: str):
    args = parse_arguments()

    # Create the agent
@ -199,7 +207,9 @@ def test_simple_task_rejection(current_test_name):
    # Give an impossible task to do: cannot write a commit message because
    # the workspace is not a git repo
    task = 'Write a git commit message for the current staging area. Do not ask me for confirmation at any point.'
-    final_state: State | None = asyncio.run(run_agent_controller(agent, task))
+    final_state: State | None = asyncio.run(
+        run_agent_controller(agent, task, max_iterations, max_budget_per_task)
+    )
    validate_final_state(final_state, current_test_name)
    assert isinstance(final_state.history.get_last_action(), AgentRejectAction)

@ -213,7 +223,7 @@ def test_simple_task_rejection(current_test_name):
    os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
    reason='Currently, only ssh sandbox supports stateful tasks',
 )
-def test_ipython_module(current_test_name):
+def test_ipython_module(current_test_name: str):
    args = parse_arguments()

    # Create the agent
@ -222,7 +232,9 @@ def test_ipython_module(current_test_name):
    # Execute the task
    task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
    final_state: State | None = asyncio.run(
-        run_agent_controller(agent, task, exit_on_message=True)
+        run_agent_controller(
+            agent, task, max_iterations, max_budget_per_task, exit_on_message=True
+        )
    )
    validate_final_state(final_state, current_test_name)

@ -252,7 +264,7 @@ def test_ipython_module(current_test_name):
    and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
    reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
 )
-def test_browse_internet(http_server, current_test_name):
+def test_browse_internet(http_server, current_test_name: str):
    args = parse_arguments()

    # Create the agent
@ -261,7 +273,9 @@ def test_browse_internet(http_server, current_test_name):
    # Execute the task
    task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
    final_state: State | None = asyncio.run(
-        run_agent_controller(agent, task, exit_on_message=True)
+        run_agent_controller(
+            agent, task, max_iterations, max_budget_per_task, exit_on_message=True
+        )
    )
    validate_final_state(final_state, current_test_name)